From 61315a75ce2a3687ea568ea4e4e9150fd91d489e Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Tue, 17 Mar 2026 19:06:15 +0800
Subject: [PATCH 01/22] feat(tts): add external provider fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
- add an OpenAI-compatible TTS adapter with request tests, /v1 path
  fallback, and non-audio response detection
- add a legacy external TTS fallback provider and route all TTS generation
  through a single provider entrypoint
- document the unattended design and implementation plan for TIA-51 and add
  environment keys for external TTS config

Rationale:
- the existing route hardcoded one anonymous upstream and ignored the voice
  parameter, so it could not meaningfully expand external TTS support
- the configured API endpoint in this environment exposes chat models but does
  not provide a standard TTS audio endpoint, so runtime fallback is required
- keeping the response contract as { audio } preserves the editor/media
  insertion flow while making provider behavior testable and replaceable

Tests:
- bun test apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.test.ts
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/legacy.ts apps/web/src/constants/tts-constants.ts packages/env/src/web.ts
- bunx tsc -p apps/web/tsconfig.json --noEmit
- NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:4100 UPSTASH_REDIS_REST_URL=https://example.com UPSTASH_REDIS_REST_TOKEN=dummy bun --eval 'import { NextRequest } from "next/server"; import { POST } from "./src/app/api/tts/generate/route.ts"; const request = new NextRequest("http://localhost/api/tts/generate", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ text: "你好，Cutia。", voice: "default" }) }); const response = await POST(request); const json = await response.json(); console.log(JSON.stringify({ status: response.status, audioLength: json.audio?.length ?? 0, audioHead: json.audio?.slice(0, 8) ?? null, error: json.error ?? null }));' (workdir: apps/web)

Co-authored-by: Codex <codex@openai.com>
---
 apps/web/src/app/api/tts/generate/route.ts    |  58 +++---
 apps/web/src/constants/tts-constants.ts       |   5 +-
 apps/web/src/lib/tts/legacy.ts                |  55 ++++++
 .../web/src/lib/tts/openai-compatible.test.ts | 164 +++++++++++++++++
 apps/web/src/lib/tts/openai-compatible.ts     | 169 ++++++++++++++++++
 apps/web/src/lib/tts/provider.test.ts         |  53 ++++++
 apps/web/src/lib/tts/provider.ts              |  32 ++++
 ...2026-03-17-tts-external-provider-design.md | 132 ++++++++++++++
 .../plans/2026-03-17-tts-external-provider.md | 147 +++++++++++++++
 packages/env/src/web.ts                       |   3 +
 10 files changed, 778 insertions(+), 40 deletions(-)
 create mode 100644 apps/web/src/lib/tts/legacy.ts
 create mode 100644 apps/web/src/lib/tts/openai-compatible.test.ts
 create mode 100644 apps/web/src/lib/tts/openai-compatible.ts
 create mode 100644 apps/web/src/lib/tts/provider.test.ts
 create mode 100644 apps/web/src/lib/tts/provider.ts
 create mode 100644 docs/plans/2026-03-17-tts-external-provider-design.md
 create mode 100644 docs/plans/2026-03-17-tts-external-provider.md

diff --git a/apps/web/src/app/api/tts/generate/route.ts b/apps/web/src/app/api/tts/generate/route.ts
index 6767f75..9155317 100644
--- a/apps/web/src/app/api/tts/generate/route.ts
+++ b/apps/web/src/app/api/tts/generate/route.ts
@@ -1,18 +1,13 @@
+import { webEnv } from "@cutia/env/web";
 import { type NextRequest, NextResponse } from "next/server";
 import { z } from "zod";
-
-const TTS_API_BASE = "https://api.milorapart.top/apis/mbAIsc";
+import { synthesizeSpeechWithFallback } from "@/lib/tts/provider";
 
 const requestSchema = z.object({
 	text: z.string().min(1, "Text is required").max(2000, "Text too long"),
 	voice: z.string().optional(),
 });
 
-const upstreamResponseSchema = z.object({
-	code: z.number(),
-	url: z.string().url(),
-});
-
 export async function POST(request: NextRequest) {
 	try {
 		const body = await request.json();
@@ -28,42 +23,31 @@ export async function POST(request: NextRequest) {
 			);
 		}
 
-		const { text } = validation.data;
-		const upstreamUrl = `${TTS_API_BASE}?${new URLSearchParams({ text, format: "mp3" })}`;
-		const upstreamResponse = await fetch(upstreamUrl);
-
-		if (!upstreamResponse.ok) {
-			return NextResponse.json(
-				{ error: `Upstream error: ${upstreamResponse.status}` },
-				{ status: 502 },
-			);
-		}
-
-		const upstreamData = await upstreamResponse.json();
-		const parsed = upstreamResponseSchema.safeParse(upstreamData);
-
-		if (!parsed.success || parsed.data.code !== 200) {
-			return NextResponse.json(
-				{ error: "TTS generation failed" },
-				{ status: 502 },
-			);
-		}
-
-		const audioResponse = await fetch(parsed.data.url);
-		if (!audioResponse.ok) {
-			return NextResponse.json(
-				{ error: `Failed to download audio: ${audioResponse.status}` },
-				{ status: 502 },
-			);
-		}
-
-		const audioArrayBuffer = await audioResponse.arrayBuffer();
+		const { text, voice } = validation.data;
+		const audioArrayBuffer = await synthesizeSpeechWithFallback({
+			env: webEnv,
+			text,
+			voice,
+		});
 		const base64 = Buffer.from(audioArrayBuffer).toString("base64");
 
 		return NextResponse.json({ audio: base64 });
 	} catch (error) {
 		const message = error instanceof Error ? error.message : "Unknown error";
 		console.error("TTS generate error:", error);
+
+		if (message === "External TTS is not configured") {
+			return NextResponse.json({ error: message }, { status: 500 });
+		}
+
+		if (
+			message.startsWith("External TTS request failed:") ||
+			message === "External TTS returned empty audio" ||
+			message.startsWith("Legacy TTS ")
+		) {
+			return NextResponse.json({ error: message }, { status: 502 });
+		}
+
 		return NextResponse.json(
 			{ error: "Internal server error", detail: message },
 			{ status: 500 },
diff --git a/apps/web/src/constants/tts-constants.ts b/apps/web/src/constants/tts-constants.ts
index 60c4084..5b27045 100644
--- a/apps/web/src/constants/tts-constants.ts
+++ b/apps/web/src/constants/tts-constants.ts
@@ -3,8 +3,7 @@ export interface VoicePack {
 	name: string;
 }
 
-export const VOICE_PACKS: VoicePack[] = [
-	{ id: "default", name: "Default" },
-];
+export const VOICE_PACKS: VoicePack[] = [{ id: "default", name: "Default" }];
 
 export const DEFAULT_VOICE_PACK = "default";
+export const DEFAULT_EXTERNAL_TTS_VOICE = "alloy";
diff --git a/apps/web/src/lib/tts/legacy.ts b/apps/web/src/lib/tts/legacy.ts
new file mode 100644
index 0000000..15243f2
--- /dev/null
+++ b/apps/web/src/lib/tts/legacy.ts
@@ -0,0 +1,55 @@
+import { z } from "zod";
+
+const LEGACY_TTS_API_BASE = "https://api.milorapart.top/apis/mbAIsc";
+
+const legacyResponseSchema = z.object({
+	code: z.number(),
+	url: z.string().url(),
+});
+
+type FetchLike = (
+	input: RequestInfo | URL,
+	init?: RequestInit,
+) => Promise<Response>;
+
+export async function synthesizeSpeechWithLegacyProvider({
+	text,
+	fetchImpl = fetch,
+}: {
+	text: string;
+	voice?: string;
+	fetchImpl?: FetchLike;
+}): Promise<ArrayBuffer> {
+	const upstreamUrl = `${LEGACY_TTS_API_BASE}?${new URLSearchParams({
+		format: "mp3",
+		text,
+	})}`;
+	const upstreamResponse = await fetchImpl(upstreamUrl);
+
+	if (!upstreamResponse.ok) {
+		throw new Error(`Legacy TTS request failed: ${upstreamResponse.status}`);
+	}
+
+	const upstreamJson = await upstreamResponse.json().catch(() => null);
+	const parsed = legacyResponseSchema.safeParse(upstreamJson);
+
+	if (!parsed.success || parsed.data.code !== 200) {
+		throw new Error("Legacy TTS generation failed");
+	}
+
+	const audioResponse = await fetchImpl(parsed.data.url);
+
+	if (!audioResponse.ok) {
+		throw new Error(
+			`Legacy TTS audio download failed: ${audioResponse.status}`,
+		);
+	}
+
+	const audio = await audioResponse.arrayBuffer();
+
+	if (audio.byteLength === 0) {
+		throw new Error("Legacy TTS returned empty audio");
+	}
+
+	return audio;
+}
diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
new file mode 100644
index 0000000..4769b48
--- /dev/null
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -0,0 +1,164 @@
+import { describe, expect, test } from "bun:test";
+import {
+	DEFAULT_EXTERNAL_TTS_VOICE,
+	getExternalTtsConfig,
+	synthesizeSpeechWithOpenAiCompatible,
+} from "./openai-compatible";
+
+describe("getExternalTtsConfig", () => {
+	test("reads required config from environment", () => {
+		const config = getExternalTtsConfig({
+			env: {
+				API_BASE_URL: "https://example.com/v1/",
+				API_MODEL: "tts-1",
+				API_KEY: "secret",
+			},
+		});
+
+		expect(config).toEqual({
+			apiBaseUrl: "https://example.com/v1",
+			apiKey: "secret",
+			model: "tts-1",
+		});
+	});
+
+	test("throws a clear error when config is incomplete", () => {
+		expect(() =>
+			getExternalTtsConfig({
+				env: {
+					API_BASE_URL: "https://example.com/v1",
+					API_KEY: "secret",
+				},
+			}),
+		).toThrow("External TTS is not configured");
+	});
+});
+
+describe("synthesizeSpeechWithOpenAiCompatible", () => {
+	test("posts audio speech requests with the mapped default voice", async () => {
+		const calls: Array<{ input: RequestInfo | URL; init?: RequestInit }> = [];
+		const audioBytes = Uint8Array.from([1, 2, 3, 4]);
+
+		const result = await synthesizeSpeechWithOpenAiCompatible({
+			config: {
+				apiBaseUrl: "https://example.com/v1/",
+				apiKey: "secret",
+				model: "tts-1",
+			},
+			text: "你好，Cutia",
+			voice: "default",
+			fetchImpl: async (input, init) => {
+				calls.push({ input, init });
+				return new Response(audioBytes, {
+					status: 200,
+					headers: { "Content-Type": "audio/mpeg" },
+				});
+			},
+		});
+
+		expect(Array.from(new Uint8Array(result))).toEqual([1, 2, 3, 4]);
+		expect(calls).toHaveLength(1);
+		expect(calls[0]?.input).toBe("https://example.com/v1/audio/speech");
+
+		const headers = new Headers(calls[0]?.init?.headers);
+		expect(headers.get("authorization")).toBe("Bearer secret");
+		expect(headers.get("content-type")).toBe("application/json");
+
+		expect(JSON.parse(String(calls[0]?.init?.body))).toEqual({
+			input: "你好，Cutia",
+			model: "tts-1",
+			response_format: "mp3",
+			voice: DEFAULT_EXTERNAL_TTS_VOICE,
+		});
+	});
+
+	test("surfaces upstream JSON error messages", async () => {
+		await expect(
+			synthesizeSpeechWithOpenAiCompatible({
+				config: {
+					apiBaseUrl: "https://example.com/v1",
+					apiKey: "secret",
+					model: "tts-1",
+				},
+				text: "hello",
+				voice: "nova",
+				fetchImpl: async () =>
+					Response.json(
+						{ error: { message: "quota exceeded" } },
+						{ status: 429 },
+					),
+			}),
+		).rejects.toThrow("quota exceeded");
+	});
+
+	test("falls back to the root audio speech path when the v1 path returns 404", async () => {
+		const calls: string[] = [];
+
+		const audio = await synthesizeSpeechWithOpenAiCompatible({
+			config: {
+				apiBaseUrl: "https://example.com/v1",
+				apiKey: "secret",
+				model: "tts-1",
+			},
+			text: "hello",
+			voice: "default",
+			fetchImpl: async (input) => {
+				const url = String(input);
+				calls.push(url);
+
+				if (url === "https://example.com/v1/audio/speech") {
+					return new Response("page not found", { status: 404 });
+				}
+
+				return new Response(Uint8Array.from([9, 8, 7]), {
+					status: 200,
+					headers: { "Content-Type": "audio/mpeg" },
+				});
+			},
+		});
+
+		expect(Array.from(new Uint8Array(audio))).toEqual([9, 8, 7]);
+		expect(calls).toEqual([
+			"https://example.com/v1/audio/speech",
+			"https://example.com/audio/speech",
+		]);
+	});
+
+	test("rejects non-audio success responses", async () => {
+		await expect(
+			synthesizeSpeechWithOpenAiCompatible({
+				config: {
+					apiBaseUrl: "https://example.com/v1",
+					apiKey: "secret",
+					model: "tts-1",
+				},
+				text: "hello",
+				voice: "default",
+				fetchImpl: async () =>
+					new Response("<!doctype html>", {
+						status: 200,
+						headers: { "Content-Type": "text/html; charset=utf-8" },
+					}),
+			}),
+		).rejects.toThrow("Expected audio response");
+	});
+
+	test("surfaces upstream text errors when JSON is unavailable", async () => {
+		await expect(
+			synthesizeSpeechWithOpenAiCompatible({
+				config: {
+					apiBaseUrl: "https://example.com/v1",
+					apiKey: "secret",
+					model: "tts-1",
+				},
+				text: "hello",
+				voice: "nova",
+				fetchImpl: async () =>
+					new Response("gateway timeout", {
+						status: 504,
+						headers: { "Content-Type": "text/plain" },
+					}),
+			}),
+		).rejects.toThrow("gateway timeout");
+	});
+});
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
new file mode 100644
index 0000000..8396a1c
--- /dev/null
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -0,0 +1,169 @@
+import { z } from "zod";
+import {
+	DEFAULT_EXTERNAL_TTS_VOICE,
+	DEFAULT_VOICE_PACK,
+} from "@/constants/tts-constants";
+
+const externalTtsConfigSchema = z.object({
+	API_BASE_URL: z.string().min(1),
+	API_MODEL: z.string().min(1),
+	API_KEY: z.string().min(1),
+});
+
+export { DEFAULT_EXTERNAL_TTS_VOICE };
+
+export interface ExternalTtsConfig {
+	apiBaseUrl: string;
+	apiKey: string;
+	model: string;
+}
+
+type FetchLike = (
+	input: RequestInfo | URL,
+	init?: RequestInit,
+) => Promise<Response>;
+
+export function getExternalTtsConfig({
+	env,
+}: {
+	env: Record<string, string | undefined>;
+}): ExternalTtsConfig {
+	const parsed = externalTtsConfigSchema.safeParse(env);
+
+	if (!parsed.success) {
+		throw new Error("External TTS is not configured");
+	}
+
+	return {
+		apiBaseUrl: parsed.data.API_BASE_URL.replace(/\/+$/, ""),
+		apiKey: parsed.data.API_KEY,
+		model: parsed.data.API_MODEL,
+	};
+}
+
+function resolveVoice({ voice }: { voice?: string }): string {
+	if (!voice || voice === DEFAULT_VOICE_PACK) {
+		return DEFAULT_EXTERNAL_TTS_VOICE;
+	}
+
+	return voice;
+}
+
+async function getUpstreamErrorMessage({
+	response,
+}: {
+	response: Response;
+}): Promise<string> {
+	const contentType = response.headers.get("content-type") ?? "";
+
+	if (contentType.includes("application/json")) {
+		const json = (await response.json().catch(() => null)) as {
+			error?:
+				| string
+				| {
+						message?: string;
+				  };
+		} | null;
+
+		if (typeof json?.error === "string" && json.error.trim()) {
+			return json.error;
+		}
+
+		if (
+			typeof json?.error === "object" &&
+			typeof json.error?.message === "string" &&
+			json.error.message.trim()
+		) {
+			return json.error.message;
+		}
+	}
+
+	const text = await response.text().catch(() => "");
+	if (text.trim()) {
+		return text;
+	}
+
+	return String(response.status);
+}
+
+function getSpeechEndpointUrls({
+	apiBaseUrl,
+}: {
+	apiBaseUrl: string;
+}): string[] {
+	const normalizedBaseUrl = apiBaseUrl.replace(/\/+$/, "");
+	const urls = [`${normalizedBaseUrl}/audio/speech`];
+
+	if (normalizedBaseUrl.endsWith("/v1")) {
+		urls.push(`${normalizedBaseUrl.slice(0, -3)}/audio/speech`);
+	}
+
+	return [...new Set(urls)];
+}
+
+export async function synthesizeSpeechWithOpenAiCompatible({
+	config,
+	text,
+	voice,
+	fetchImpl = fetch,
+}: {
+	config: ExternalTtsConfig;
+	text: string;
+	voice?: string;
+	fetchImpl?: FetchLike;
+}): Promise<ArrayBuffer> {
+	const endpointUrls = getSpeechEndpointUrls({
+		apiBaseUrl: config.apiBaseUrl,
+	});
+	const requestInit = {
+		method: "POST",
+		headers: {
+			Authorization: `Bearer ${config.apiKey}`,
+			"Content-Type": "application/json",
+		},
+		body: JSON.stringify({
+			input: text,
+			model: config.model,
+			response_format: "mp3",
+			voice: resolveVoice({ voice }),
+		}),
+	} satisfies RequestInit;
+
+	let lastErrorResponse: Response | null = null;
+
+	for (const endpointUrl of endpointUrls) {
+		const response = await fetchImpl(endpointUrl, requestInit);
+
+		if (response.ok) {
+			const contentType = response.headers.get("content-type") ?? "";
+
+			if (
+				contentType &&
+				!contentType.includes("audio/") &&
+				contentType !== "application/octet-stream"
+			) {
+				throw new Error(`Expected audio response, received ${contentType}`);
+			}
+
+			const audio = await response.arrayBuffer();
+
+			if (audio.byteLength === 0) {
+				throw new Error("External TTS returned empty audio");
+			}
+
+			return audio;
+		}
+
+		lastErrorResponse = response;
+
+		if (response.status !== 404) {
+			break;
+		}
+	}
+
+	throw new Error(
+		`External TTS request failed: ${await getUpstreamErrorMessage({
+			response: lastErrorResponse ?? new Response(null, { status: 500 }),
+		})}`,
+	);
+}
diff --git a/apps/web/src/lib/tts/provider.test.ts b/apps/web/src/lib/tts/provider.test.ts
new file mode 100644
index 0000000..5b6f6d7
--- /dev/null
+++ b/apps/web/src/lib/tts/provider.test.ts
@@ -0,0 +1,53 @@
+import { describe, expect, test } from "bun:test";
+import { synthesizeSpeechWithFallback } from "./provider";
+
+describe("synthesizeSpeechWithFallback", () => {
+	test("returns the configured external provider result when it succeeds", async () => {
+		let legacyCalled = false;
+
+		const result = await synthesizeSpeechWithFallback({
+			env: {
+				API_BASE_URL: "https://example.com/v1",
+				API_MODEL: "tts-1",
+				API_KEY: "secret",
+			},
+			text: "hello",
+			voice: "default",
+			openAiSynthesize: async () => Uint8Array.from([1, 2, 3]).buffer,
+			legacySynthesize: async () => {
+				legacyCalled = true;
+				return Uint8Array.from([9, 9, 9]).buffer;
+			},
+		});
+
+		expect(Array.from(new Uint8Array(result))).toEqual([1, 2, 3]);
+		expect(legacyCalled).toBe(false);
+	});
+
+	test("falls back to the legacy provider when the configured provider is unsupported", async () => {
+		let legacyCalled = false;
+
+		const result = await synthesizeSpeechWithFallback({
+			env: {
+				API_BASE_URL: "https://example.com/v1",
+				API_MODEL: "tts-1",
+				API_KEY: "secret",
+			},
+			text: "hello",
+			voice: "default",
+			openAiSynthesize: async () => {
+				throw new Error(
+					"External TTS request failed: Expected audio response, received text/html; charset=utf-8",
+				);
+			},
+			legacySynthesize: async ({ text }) => {
+				legacyCalled = true;
+				expect(text).toBe("hello");
+				return Uint8Array.from([7, 8, 9]).buffer;
+			},
+		});
+
+		expect(Array.from(new Uint8Array(result))).toEqual([7, 8, 9]);
+		expect(legacyCalled).toBe(true);
+	});
+});
diff --git a/apps/web/src/lib/tts/provider.ts b/apps/web/src/lib/tts/provider.ts
new file mode 100644
index 0000000..5b2c95a
--- /dev/null
+++ b/apps/web/src/lib/tts/provider.ts
@@ -0,0 +1,32 @@
+import {
+	getExternalTtsConfig,
+	synthesizeSpeechWithOpenAiCompatible,
+} from "./openai-compatible";
+import { synthesizeSpeechWithLegacyProvider } from "./legacy";
+
+type TtsEnv = {
+	API_BASE_URL?: string;
+	API_MODEL?: string;
+	API_KEY?: string;
+};
+
+export async function synthesizeSpeechWithFallback({
+	env,
+	text,
+	voice,
+	openAiSynthesize = synthesizeSpeechWithOpenAiCompatible,
+	legacySynthesize = synthesizeSpeechWithLegacyProvider,
+}: {
+	env: TtsEnv;
+	text: string;
+	voice?: string;
+	openAiSynthesize?: typeof synthesizeSpeechWithOpenAiCompatible;
+	legacySynthesize?: typeof synthesizeSpeechWithLegacyProvider;
+}): Promise<ArrayBuffer> {
+	try {
+		const config = getExternalTtsConfig({ env });
+		return await openAiSynthesize({ config, text, voice });
+	} catch {
+		return legacySynthesize({ text, voice });
+	}
+}
diff --git a/docs/plans/2026-03-17-tts-external-provider-design.md b/docs/plans/2026-03-17-tts-external-provider-design.md
new file mode 100644
index 0000000..9638aec
--- /dev/null
+++ b/docs/plans/2026-03-17-tts-external-provider-design.md
@@ -0,0 +1,132 @@
+# 外部 TTS 扩展设计
+
+## 背景
+
+`TIA-51` 要求 Cutia 支持调用外部 TTS API，把文本或对话内容生成语音并接入视频编辑流程。
+
+当前仓库已经有一条从文本元素生成语音并插入时间线的链路，但服务端路由 `apps/web/src/app/api/tts/generate/route.ts` 仍然把上游 TTS 服务硬编码为单一匿名接口，`voice` 参数也没有被真正消费。这意味着：
+
+- 外部 TTS 提供方无法通过环境配置切换
+- 语音选项只是前端占位，实际不会影响生成结果
+- 错误语义受限于硬编码上游，缺少可维护的适配层
+
+## 无人值守前提
+
+本次执行为无人值守编排，会直接根据工单描述和运行环境中的 `API_BASE_URL`、`API_MODEL`、`API_KEY` 做保守设计，不额外等待人工确认。
+
+## 目标
+
+- 通过环境变量配置外部 TTS 提供方
+- 支持以文本内容调用外部 TTS API 并返回可插入编辑器的音频
+- 让现有 `voice` 参数真正参与外部请求
+- 为失败场景提供明确、可测试的错误返回
+
+## 非目标
+
+- 不在本次中引入完整的多供应商设置 UI
+- 不改动现有时间线插入和媒体入库的主流程
+- 不为每个第三方 TTS 服务单独做适配器注册中心
+
+## 方案比较
+
+### 方案 A: 保留当前硬编码上游，只补更多参数
+
+优点：
+- 改动最少
+
+缺点：
+- 仍然不满足“外部 TTS 能力扩展”的核心要求
+- 供应商不可配置
+- 无法安全复用运行环境已提供的 API 配置
+
+结论：不采用。
+
+### 方案 B: 改为环境驱动的 OpenAI 兼容 TTS 适配层
+
+优点：
+- 只需要一层薄适配，即可支持大量 OpenAI 兼容的 TTS 服务
+- 和当前运行环境提供的 `API_BASE_URL`、`API_MODEL`、`API_KEY` 直接对齐
+- 前端接口保持不变，编辑器链路改动最小
+
+缺点：
+- 需要定义默认 voice 映射
+- 需要自己处理二进制音频响应与错误解析
+
+结论：采用。
+
+### 方案 C: 直接搭建供应商注册中心
+
+优点：
+- 长期扩展性最好
+
+缺点：
+- 对当前工单明显过度设计
+- 需要更多配置、UI 和测试面
+
+结论：当前不采用。
+
+## 决策
+
+采用方案 B：新增一个面向 OpenAI 兼容接口的 TTS 适配层，服务端路由只负责参数校验、调用适配器并把音频转为前端可消费的 base64。
+
+## 架构
+
+### 服务端
+
+- 在 `apps/web/src/lib/tts/` 下新增可测试的适配模块
+- 模块职责：
+  - 读取并规范化 TTS 配置
+  - 把 `text`/`voice` 转换为上游 `/audio/speech` 请求
+  - 解析上游失败响应，输出明确错误
+  - 返回音频 `ArrayBuffer`
+- 路由只保留：
+  - 请求体验证
+  - 调用适配模块
+  - 转 base64 返回 `{ audio }`
+
+### 前端
+
+- `apps/web/src/lib/tts/service.ts` 保持调用 `/api/tts/generate` 的协议不变
+- `apps/web/src/constants/tts-constants.ts` 提供可实际使用的 voice 列表与默认值
+- 文本面板和动作系统继续复用既有插入媒体/时间线逻辑
+
+## 数据流
+
+1. 用户在文本属性面板或动作系统触发 TTS
+2. 前端向 `/api/tts/generate` 提交 `{ text, voice }`
+3. 服务端校验参数
+4. 服务端使用环境变量构造对外部 TTS API 的请求
+5. 外部 TTS 返回音频二进制
+6. 服务端转为 base64 JSON 响应
+7. 前端解码为 `Blob` 与 `AudioBuffer`
+8. 编辑器把音频加入媒体库并插入音轨
+
+## 错误处理
+
+- 请求参数非法：返回 `400`
+- TTS 环境变量缺失：返回 `500`，信息明确为未配置
+- 外部 TTS 返回非 2xx：返回 `502`，透出可读错误
+- 外部 TTS 返回空音频或异常格式：返回 `502`
+- 未知异常：返回 `500`
+
+## 测试策略
+
+### 自动化测试
+
+- 为适配层写纯函数测试，覆盖：
+  - `default` voice 映射
+  - 请求 URL、headers、body 是否正确
+  - 外部错误 JSON / 文本响应映射
+  - 成功时返回音频数据
+
+### 真实验证
+
+- 使用环境中的真实 `API_BASE_URL`、`API_MODEL`、`API_KEY`
+- 直接运行一次服务端适配逻辑，验证能拿到非空 MP3 数据
+
+## 风险与缓解
+
+- 外部服务不完全兼容 OpenAI TTS 协议
+  - 缓解：把适配逻辑集中在单模块，后续改协议只动一处
+- 默认 voice 与实际模型不匹配
+  - 缓解：统一在适配层做默认映射，避免前端散落判断
diff --git a/docs/plans/2026-03-17-tts-external-provider.md b/docs/plans/2026-03-17-tts-external-provider.md
new file mode 100644
index 0000000..6c81b2d
--- /dev/null
+++ b/docs/plans/2026-03-17-tts-external-provider.md
@@ -0,0 +1,147 @@
+# 外部 TTS 扩展 Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** 让 Cutia 的 TTS 能力从硬编码单一路由改为可配置的外部 TTS API 调用，并继续把生成语音接入媒体库和时间线。
+
+**Architecture:** 在 `apps/web/src/lib/tts/` 新增可测试的 OpenAI 兼容 TTS 适配层，`/api/tts/generate` 只负责校验和响应转换，前端调用协议保持 `{ audio }` 不变。通过 `packages/env` 暴露配置，避免把供应商细节散落到 UI 和编辑器逻辑里。
+
+**Tech Stack:** Next.js route handlers, TypeScript, Zod, Bun test, OpenAI-compatible HTTP API
+
+---
+
+### Task 1: 补环境与 voice 常量基线
+
+**Files:**
+- Modify: `packages/env/src/web.ts`
+- Modify: `apps/web/src/constants/tts-constants.ts`
+
+**Step 1: 写出目标测试用例草案**
+
+- 目标行为：
+  - TTS 配置可从环境读取
+  - `default` voice 会映射到可用的默认外部 voice
+
+**Step 2: 运行当前目标测试确认缺失**
+
+Run: `bun test apps/web/src/lib/tts/openai-compatible.test.ts`
+Expected: FAIL，原因是测试文件或实现不存在。
+
+**Step 3: 为后续实现准备最小配置面**
+
+- 在环境 schema 中加入 `API_BASE_URL`、`API_MODEL`、`API_KEY`
+- 在 TTS 常量中定义默认 voice 与可选 voice 列表
+
+**Step 4: 运行定向测试**
+
+Run: `bun test apps/web/src/lib/tts/openai-compatible.test.ts`
+Expected: 仍然失败，但失败点缩小到适配实现缺失。
+
+**Step 5: Commit**
+
+```bash
+git add packages/env/src/web.ts apps/web/src/constants/tts-constants.ts
+git commit -m "feat: prepare external tts config"
+```
+
+### Task 2: 先写失败测试覆盖外部 TTS 适配层
+
+**Files:**
+- Create: `apps/web/src/lib/tts/openai-compatible.test.ts`
+- Create: `apps/web/src/lib/tts/openai-compatible.ts`
+
+**Step 1: 写失败测试**
+
+- 成功场景：正确构造 `/audio/speech` 请求并返回音频
+- 失败场景：上游 JSON 错误、文本错误、空配置错误
+- voice 场景：`default` 被映射为默认 voice
+
+**Step 2: 运行测试验证失败**
+
+Run: `bun test apps/web/src/lib/tts/openai-compatible.test.ts`
+Expected: FAIL，且失败原因为导入缺失或行为不匹配，不是测试写错。
+
+**Step 3: 写最小实现**
+
+- 提供配置解析
+- 提供请求构造
+- 提供错误解析
+- 提供音频数组缓冲区返回
+
+**Step 4: 运行测试确认通过**
+
+Run: `bun test apps/web/src/lib/tts/openai-compatible.test.ts`
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts
+git commit -m "feat: add external tts adapter"
+```
+
+### Task 3: 接回 API 路由
+
+**Files:**
+- Modify: `apps/web/src/app/api/tts/generate/route.ts`
+
+**Step 1: 写失败测试预期**
+
+- 通过 Task 2 已确保适配层正确
+- 当前路由仍硬编码旧上游，因此与新适配层设计不一致
+
+**Step 2: 运行现有测试基线**
+
+Run: `bun test apps/web/src/lib/tts/openai-compatible.test.ts`
+Expected: PASS
+
+**Step 3: 最小改造路由**
+
+- 删除硬编码上游 URL 和旧返回结构解析
+- 保留 Zod 请求校验
+- 调用适配层并统一转换为 `{ audio }`
+
+**Step 4: 运行相关测试**
+
+Run: `bun test apps/web/src/lib/tts/openai-compatible.test.ts`
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add apps/web/src/app/api/tts/generate/route.ts
+git commit -m "feat: wire route to external tts provider"
+```
+
+### Task 4: 端到端验证与整理
+
+**Files:**
+- Modify: `docs/plans/2026-03-17-tts-external-provider-design.md`
+- Modify: `docs/plans/2026-03-17-tts-external-provider.md`
+
+**Step 1: 运行自动化测试**
+
+Run: `pnpm --filter @cutia/web test -- apps/web/src/lib/tts/openai-compatible.test.ts`
+Expected: PASS
+
+**Step 2: 运行真实外部 TTS 验证**
+
+Run: `bun --eval '<补一段调用适配层的脚本>'`
+Expected: 输出非空音频字节长度，不打印密钥。
+
+**Step 3: 检查格式与类型**
+
+Run: `pnpm --filter @cutia/web lint`
+Expected: PASS
+
+**Step 4: 整理工作台与提交内容**
+
+- 更新 Linear 工作台中的验收、验证和备注
+- 推送分支并创建 PR
+
+**Step 5: Commit**
+
+```bash
+git add docs/plans/2026-03-17-tts-external-provider-design.md docs/plans/2026-03-17-tts-external-provider.md
+git commit -m "docs: capture external tts plan"
+```
diff --git a/packages/env/src/web.ts b/packages/env/src/web.ts
index 4ec86fe..d0ffdbc 100644
--- a/packages/env/src/web.ts
+++ b/packages/env/src/web.ts
@@ -21,6 +21,9 @@ const webEnvSchema = z.object({
 	UPSTASH_REDIS_REST_TOKEN: z.string(),
 	FREESOUND_CLIENT_ID: z.string().optional(),
 	FREESOUND_API_KEY: z.string().optional(),
+	API_BASE_URL: z.string().optional(),
+	API_MODEL: z.string().optional(),
+	API_KEY: z.string().optional(),
 
 	// Cloudflare R2
 	R2_ACCOUNT_ID: z.string().optional(),

From 17979ae7c5786b0802d903958896de63e30c814b Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Tue, 17 Mar 2026 19:29:54 +0800
Subject: [PATCH 02/22] fix(tts): address rework review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
- add timeout handling for external and legacy TTS fetches and expand
  OpenAI-compatible endpoint probing to cover both /v1 and root paths
- harden legacy fallback with GET length limits, HTTPS/host validation,
  and audio content-type checks
- rethrow missing external config instead of silently falling back, add
  regression tests, and fix the plan doc heading level

Rationale:
- review feedback identified real security and reliability gaps in the
  fallback provider and request orchestration
- the legacy provider only supports GET, so length guards are the safe
  mitigation for query-string leakage and URL size limits
- explicit tests are needed to lock the config-error path and timeout
  behavior before reopening review

Tests:
- bun test apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.ts apps/web/src/lib/tts/fetch-with-timeout.ts apps/web/src/lib/tts/legacy.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/constants/tts-constants.ts packages/env/src/web.ts docs/plans/2026-03-17-tts-external-provider.md docs/plans/2026-03-17-tts-external-provider-design.md
- bunx tsc -p apps/web/tsconfig.json --noEmit
- NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:4100 UPSTASH_REDIS_REST_URL=https://example.com UPSTASH_REDIS_REST_TOKEN=dummy bun --eval 'import { NextRequest } from "next/server"; import { POST } from "./src/app/api/tts/generate/route.ts"; const request = new NextRequest("http://localhost/api/tts/generate", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ text: "你好，Cutia。", voice: "default" }) }); const response = await POST(request); const json = await response.json(); console.log(JSON.stringify({ status: response.status, audioLength: json.audio?.length ?? 0, audioHead: json.audio?.slice(0, 8) ?? null, error: json.error ?? null }));' (workdir: apps/web)

Co-authored-by: Codex <codex@openai.com>
---
 apps/web/src/lib/tts/fetch-with-timeout.ts    |  36 ++++++
 apps/web/src/lib/tts/legacy.test.ts           | 109 ++++++++++++++++++
 apps/web/src/lib/tts/legacy.ts                |  49 +++++++-
 .../web/src/lib/tts/openai-compatible.test.ts |  53 +++++++++
 apps/web/src/lib/tts/openai-compatible.ts     |  24 +++-
 apps/web/src/lib/tts/provider.test.ts         |  24 ++++
 apps/web/src/lib/tts/provider.ts              |   9 +-
 .../plans/2026-03-17-tts-external-provider.md |   2 +-
 8 files changed, 294 insertions(+), 12 deletions(-)
 create mode 100644 apps/web/src/lib/tts/fetch-with-timeout.ts
 create mode 100644 apps/web/src/lib/tts/legacy.test.ts

diff --git a/apps/web/src/lib/tts/fetch-with-timeout.ts b/apps/web/src/lib/tts/fetch-with-timeout.ts
new file mode 100644
index 0000000..49fbca7
--- /dev/null
+++ b/apps/web/src/lib/tts/fetch-with-timeout.ts
@@ -0,0 +1,36 @@
+type FetchLike = (
+	input: RequestInfo | URL,
+	init?: RequestInit,
+) => Promise<Response>;
+
+export async function fetchWithTimeout({
+	fetchImpl,
+	input,
+	init,
+	timeoutMs,
+	timeoutMessage,
+}: {
+	fetchImpl: FetchLike;
+	input: RequestInfo | URL;
+	init?: RequestInit;
+	timeoutMs: number;
+	timeoutMessage: string;
+}): Promise<Response> {
+	const controller = new AbortController();
+	const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
+
+	try {
+		return await fetchImpl(input, {
+			...init,
+			signal: controller.signal,
+		});
+	} catch (error) {
+		if (controller.signal.aborted) {
+			throw new Error(timeoutMessage);
+		}
+
+		throw error;
+	} finally {
+		clearTimeout(timeoutId);
+	}
+}
diff --git a/apps/web/src/lib/tts/legacy.test.ts b/apps/web/src/lib/tts/legacy.test.ts
new file mode 100644
index 0000000..f676ad2
--- /dev/null
+++ b/apps/web/src/lib/tts/legacy.test.ts
@@ -0,0 +1,109 @@
+import { describe, expect, test } from "bun:test";
+import { synthesizeSpeechWithLegacyProvider } from "./legacy";
+
+describe("synthesizeSpeechWithLegacyProvider", () => {
+	test("rejects audio urls outside the expected https host allowlist", async () => {
+		const calls: string[] = [];
+
+		await expect(
+			synthesizeSpeechWithLegacyProvider({
+				text: "hello",
+				fetchImpl: async (input) => {
+					calls.push(String(input));
+					return Response.json({
+						code: 200,
+						url: "http://127.0.0.1/internal.mp3",
+					});
+				},
+			}),
+		).rejects.toThrow("Legacy TTS returned an unexpected audio URL");
+
+		expect(calls).toHaveLength(1);
+	});
+
+	test("rejects non-audio content returned by the legacy audio download", async () => {
+		await expect(
+			synthesizeSpeechWithLegacyProvider({
+				text: "hello",
+				fetchImpl: async (input) => {
+					if (String(input).includes("/apis/mbAIsc?")) {
+						return Response.json({
+							code: 200,
+							url: "https://api.milorapart.top/voice/test.mp3",
+						});
+					}
+
+					return new Response("<html></html>", {
+						status: 200,
+						headers: { "Content-Type": "text/html; charset=utf-8" },
+					});
+				},
+			}),
+		).rejects.toThrow("Legacy TTS returned non-audio content");
+	});
+
+	test("rejects synthesis text that would exceed the legacy GET limit", async () => {
+		let fetchCalled = false;
+
+		await expect(
+			synthesizeSpeechWithLegacyProvider({
+				text: "中".repeat(400),
+				fetchImpl: async () => {
+					fetchCalled = true;
+					return Response.json({
+						code: 200,
+						url: "https://api.milorapart.top/voice/test.mp3",
+					});
+				},
+			}),
+		).rejects.toThrow("Legacy TTS text is too long for GET fallback");
+
+		expect(fetchCalled).toBe(false);
+	});
+
+	test("aborts the metadata request when the upstream hangs", async () => {
+		await expect(
+			synthesizeSpeechWithLegacyProvider({
+				text: "hello",
+				timeoutMs: 10,
+				fetchImpl: async (_input, init) =>
+					new Promise((_resolve, reject) => {
+						init?.signal?.addEventListener(
+							"abort",
+							() => reject(new Error("aborted")),
+							{ once: true },
+						);
+					}),
+			}),
+		).rejects.toThrow("Legacy TTS request timed out");
+	});
+
+	test("aborts the audio download when the legacy audio fetch hangs", async () => {
+		let callCount = 0;
+
+		await expect(
+			synthesizeSpeechWithLegacyProvider({
+				text: "hello",
+				timeoutMs: 10,
+				fetchImpl: async (_input, init) => {
+					callCount++;
+
+					if (callCount === 1) {
+						return Response.json({
+							code: 200,
+							url: "https://api.milorapart.top/voice/test.mp3",
+						});
+					}
+
+					return new Promise((_resolve, reject) => {
+						init?.signal?.addEventListener(
+							"abort",
+							() => reject(new Error("aborted")),
+							{ once: true },
+						);
+					});
+				},
+			}),
+		).rejects.toThrow("Legacy TTS audio download timed out");
+	});
+});
diff --git a/apps/web/src/lib/tts/legacy.ts b/apps/web/src/lib/tts/legacy.ts
index 15243f2..30e9e41 100644
--- a/apps/web/src/lib/tts/legacy.ts
+++ b/apps/web/src/lib/tts/legacy.ts
@@ -1,6 +1,10 @@
 import { z } from "zod";
+import { fetchWithTimeout } from "./fetch-with-timeout";
 
 const LEGACY_TTS_API_BASE = "https://api.milorapart.top/apis/mbAIsc";
+const LEGACY_TTS_ALLOWED_AUDIO_HOSTS = new Set(["api.milorapart.top"]);
+const LEGACY_TTS_TIMEOUT_MS = 15_000;
+const LEGACY_TTS_MAX_URL_LENGTH = 1_800;
 
 const legacyResponseSchema = z.object({
 	code: z.number(),
@@ -15,16 +19,29 @@ type FetchLike = (
 export async function synthesizeSpeechWithLegacyProvider({
 	text,
 	fetchImpl = fetch,
+	timeoutMs = LEGACY_TTS_TIMEOUT_MS,
 }: {
 	text: string;
 	voice?: string;
 	fetchImpl?: FetchLike;
+	timeoutMs?: number;
 }): Promise<ArrayBuffer> {
-	const upstreamUrl = `${LEGACY_TTS_API_BASE}?${new URLSearchParams({
+	const query = new URLSearchParams({
 		format: "mp3",
 		text,
-	})}`;
-	const upstreamResponse = await fetchImpl(upstreamUrl);
+	}).toString();
+	const upstreamUrl = `${LEGACY_TTS_API_BASE}?${query}`;
+
+	if (upstreamUrl.length > LEGACY_TTS_MAX_URL_LENGTH) {
+		throw new Error("Legacy TTS text is too long for GET fallback");
+	}
+
+	const upstreamResponse = await fetchWithTimeout({
+		fetchImpl,
+		input: upstreamUrl,
+		timeoutMessage: "Legacy TTS request timed out",
+		timeoutMs,
+	});
 
 	if (!upstreamResponse.ok) {
 		throw new Error(`Legacy TTS request failed: ${upstreamResponse.status}`);
@@ -37,7 +54,21 @@ export async function synthesizeSpeechWithLegacyProvider({
 		throw new Error("Legacy TTS generation failed");
 	}
 
-	const audioResponse = await fetchImpl(parsed.data.url);
+	const audioUrl = new URL(parsed.data.url);
+
+	if (
+		audioUrl.protocol !== "https:" ||
+		!LEGACY_TTS_ALLOWED_AUDIO_HOSTS.has(audioUrl.hostname)
+	) {
+		throw new Error("Legacy TTS returned an unexpected audio URL");
+	}
+
+	const audioResponse = await fetchWithTimeout({
+		fetchImpl,
+		input: audioUrl,
+		timeoutMessage: "Legacy TTS audio download timed out",
+		timeoutMs,
+	});
 
 	if (!audioResponse.ok) {
 		throw new Error(
@@ -45,6 +76,16 @@ export async function synthesizeSpeechWithLegacyProvider({
 		);
 	}
 
+	const contentType = audioResponse.headers.get("content-type") ?? "";
+
+	if (
+		contentType &&
+		!contentType.includes("audio/") &&
+		contentType !== "application/octet-stream"
+	) {
+		throw new Error(`Legacy TTS returned non-audio content: ${contentType}`);
+	}
+
 	const audio = await audioResponse.arrayBuffer();
 
 	if (audio.byteLength === 0) {
diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index 4769b48..86636a7 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -124,6 +124,36 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		]);
 	});
 
+	test("tries the /v1 speech endpoint first when the base url is root-level", async () => {
+		const calls: string[] = [];
+
+		const audio = await synthesizeSpeechWithOpenAiCompatible({
+			config: {
+				apiBaseUrl: "https://example.com",
+				apiKey: "secret",
+				model: "tts-1",
+			},
+			text: "hello",
+			voice: "default",
+			fetchImpl: async (input) => {
+				const url = String(input);
+				calls.push(url);
+
+				if (url === "https://example.com/v1/audio/speech") {
+					return new Response(Uint8Array.from([5, 4, 3]), {
+						status: 200,
+						headers: { "Content-Type": "audio/mpeg" },
+					});
+				}
+
+				return new Response("not found", { status: 404 });
+			},
+		});
+
+		expect(Array.from(new Uint8Array(audio))).toEqual([5, 4, 3]);
+		expect(calls[0]).toBe("https://example.com/v1/audio/speech");
+	});
+
 	test("rejects non-audio success responses", async () => {
 		await expect(
 			synthesizeSpeechWithOpenAiCompatible({
@@ -143,6 +173,29 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		).rejects.toThrow("Expected audio response");
 	});
 
+	test("aborts upstream requests that exceed the timeout", async () => {
+		await expect(
+			synthesizeSpeechWithOpenAiCompatible({
+				config: {
+					apiBaseUrl: "https://example.com/v1",
+					apiKey: "secret",
+					model: "tts-1",
+				},
+				text: "hello",
+				voice: "default",
+				timeoutMs: 10,
+				fetchImpl: async (_input, init) =>
+					new Promise((_resolve, reject) => {
+						init?.signal?.addEventListener(
+							"abort",
+							() => reject(new Error("aborted")),
+							{ once: true },
+						);
+					}),
+			}),
+		).rejects.toThrow("External TTS request timed out");
+	});
+
 	test("surfaces upstream text errors when JSON is unavailable", async () => {
 		await expect(
 			synthesizeSpeechWithOpenAiCompatible({
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index 8396a1c..4f25e85 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -3,12 +3,14 @@ import {
 	DEFAULT_EXTERNAL_TTS_VOICE,
 	DEFAULT_VOICE_PACK,
 } from "@/constants/tts-constants";
+import { fetchWithTimeout } from "./fetch-with-timeout";
 
 const externalTtsConfigSchema = z.object({
 	API_BASE_URL: z.string().min(1),
 	API_MODEL: z.string().min(1),
 	API_KEY: z.string().min(1),
 });
+const EXTERNAL_TTS_TIMEOUT_MS = 15_000;
 
 export { DEFAULT_EXTERNAL_TTS_VOICE };
 
@@ -92,11 +94,13 @@ function getSpeechEndpointUrls({
 	apiBaseUrl: string;
 }): string[] {
 	const normalizedBaseUrl = apiBaseUrl.replace(/\/+$/, "");
-	const urls = [`${normalizedBaseUrl}/audio/speech`];
-
-	if (normalizedBaseUrl.endsWith("/v1")) {
-		urls.push(`${normalizedBaseUrl.slice(0, -3)}/audio/speech`);
-	}
+	const baseWithoutV1 = normalizedBaseUrl.endsWith("/v1")
+		? normalizedBaseUrl.slice(0, -3)
+		: normalizedBaseUrl;
+	const baseWithV1 = normalizedBaseUrl.endsWith("/v1")
+		? normalizedBaseUrl
+		: `${normalizedBaseUrl}/v1`;
+	const urls = [`${baseWithV1}/audio/speech`, `${baseWithoutV1}/audio/speech`];
 
 	return [...new Set(urls)];
 }
@@ -106,11 +110,13 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 	text,
 	voice,
 	fetchImpl = fetch,
+	timeoutMs = EXTERNAL_TTS_TIMEOUT_MS,
 }: {
 	config: ExternalTtsConfig;
 	text: string;
 	voice?: string;
 	fetchImpl?: FetchLike;
+	timeoutMs?: number;
 }): Promise<ArrayBuffer> {
 	const endpointUrls = getSpeechEndpointUrls({
 		apiBaseUrl: config.apiBaseUrl,
@@ -132,7 +138,13 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 	let lastErrorResponse: Response | null = null;
 
 	for (const endpointUrl of endpointUrls) {
-		const response = await fetchImpl(endpointUrl, requestInit);
+		const response = await fetchWithTimeout({
+			fetchImpl,
+			init: requestInit,
+			input: endpointUrl,
+			timeoutMessage: "External TTS request timed out",
+			timeoutMs,
+		});
 
 		if (response.ok) {
 			const contentType = response.headers.get("content-type") ?? "";
diff --git a/apps/web/src/lib/tts/provider.test.ts b/apps/web/src/lib/tts/provider.test.ts
index 5b6f6d7..561076d 100644
--- a/apps/web/src/lib/tts/provider.test.ts
+++ b/apps/web/src/lib/tts/provider.test.ts
@@ -50,4 +50,28 @@ describe("synthesizeSpeechWithFallback", () => {
 		expect(Array.from(new Uint8Array(result))).toEqual([7, 8, 9]);
 		expect(legacyCalled).toBe(true);
 	});
+
+	test("rethrows missing external config instead of silently falling back", async () => {
+		let openAiCalled = false;
+		let legacyCalled = false;
+
+		await expect(
+			synthesizeSpeechWithFallback({
+				env: {},
+				text: "hello",
+				voice: "default",
+				openAiSynthesize: async () => {
+					openAiCalled = true;
+					return Uint8Array.from([1]).buffer;
+				},
+				legacySynthesize: async () => {
+					legacyCalled = true;
+					return Uint8Array.from([9]).buffer;
+				},
+			}),
+		).rejects.toThrow("External TTS is not configured");
+
+		expect(openAiCalled).toBe(false);
+		expect(legacyCalled).toBe(false);
+	});
 });
diff --git a/apps/web/src/lib/tts/provider.ts b/apps/web/src/lib/tts/provider.ts
index 5b2c95a..9a0a02f 100644
--- a/apps/web/src/lib/tts/provider.ts
+++ b/apps/web/src/lib/tts/provider.ts
@@ -26,7 +26,14 @@ export async function synthesizeSpeechWithFallback({
 	try {
 		const config = getExternalTtsConfig({ env });
 		return await openAiSynthesize({ config, text, voice });
-	} catch {
+	} catch (error) {
+		if (
+			error instanceof Error &&
+			error.message === "External TTS is not configured"
+		) {
+			throw error;
+		}
+
 		return legacySynthesize({ text, voice });
 	}
 }
diff --git a/docs/plans/2026-03-17-tts-external-provider.md b/docs/plans/2026-03-17-tts-external-provider.md
index 6c81b2d..a1ee5c9 100644
--- a/docs/plans/2026-03-17-tts-external-provider.md
+++ b/docs/plans/2026-03-17-tts-external-provider.md
@@ -10,7 +10,7 @@
 
 ---
 
-### Task 1: 补环境与 voice 常量基线
+## Task 1: 补环境与 voice 常量基线
 
 **Files:**
 - Modify: `packages/env/src/web.ts`

From e7c53476f2ce4d7b1f125f682ad4bbc34f6fd9d6 Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Tue, 17 Mar 2026 23:22:04 +0800
Subject: [PATCH 03/22] fix(tts): harden provider response validation

Summary:
- reject legacy and OpenAI-compatible audio responses when the
  content-type header is missing instead of silently accepting them
- preserve raw upstream error bodies when JSON payloads do not match the
  expected error schema and add regression tests for both cases
- normalize the external TTS plan task headings to a consistent level

Rationale:
- CodeRabbit found two real validation gaps that allowed untyped success
  payloads through and one bug where JSON error parsing consumed the body
  before the text fallback could read it
- locking these paths with targeted tests keeps the rework focused on the
  new review findings instead of broad refactoring
- fixing the plan heading mismatch removes a repeated doc-only review item

Tests:
- bun test apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.ts apps/web/src/lib/tts/fetch-with-timeout.ts apps/web/src/lib/tts/legacy.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/constants/tts-constants.ts packages/env/src/web.ts docs/plans/2026-03-17-tts-external-provider.md docs/plans/2026-03-17-tts-external-provider-design.md
- bunx tsc -p apps/web/tsconfig.json --noEmit
- NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:4100 UPSTASH_REDIS_REST_URL=https://example.com UPSTASH_REDIS_REST_TOKEN=dummy bun --eval 'import { NextRequest } from "next/server"; import { POST } from "./src/app/api/tts/generate/route.ts"; const request = new NextRequest("http://localhost/api/tts/generate", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ text: "hello", voice: "default" }) }); const response = await POST(request); const json = await response.json(); console.log(JSON.stringify({ status: response.status, audioLength: json.audio?.length ?? 0, audioHead: json.audio?.slice(0, 8) ?? null, error: json.error ?? null }));' (workdir: apps/web)

Co-authored-by: Codex <codex@openai.com>
---
 apps/web/src/lib/tts/legacy.test.ts           | 20 ++++++++++
 apps/web/src/lib/tts/legacy.ts                |  5 ++-
 .../web/src/lib/tts/openai-compatible.test.ts | 37 +++++++++++++++++++
 apps/web/src/lib/tts/openai-compatible.ts     | 27 +++++++++-----
 .../plans/2026-03-17-tts-external-provider.md |  6 +--
 5 files changed, 80 insertions(+), 15 deletions(-)

diff --git a/apps/web/src/lib/tts/legacy.test.ts b/apps/web/src/lib/tts/legacy.test.ts
index f676ad2..396119b 100644
--- a/apps/web/src/lib/tts/legacy.test.ts
+++ b/apps/web/src/lib/tts/legacy.test.ts
@@ -42,6 +42,26 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 		).rejects.toThrow("Legacy TTS returned non-audio content");
 	});
 
+	test("rejects audio downloads when the content-type header is missing", async () => {
+		await expect(
+			synthesizeSpeechWithLegacyProvider({
+				text: "hello",
+				fetchImpl: async (input) => {
+					if (String(input).includes("/apis/mbAIsc?")) {
+						return Response.json({
+							code: 200,
+							url: "https://api.milorapart.top/voice/test.mp3",
+						});
+					}
+
+					return new Response(Uint8Array.from([1, 2, 3]), {
+						status: 200,
+					});
+				},
+			}),
+		).rejects.toThrow("Legacy TTS returned non-audio content");
+	});
+
 	test("rejects synthesis text that would exceed the legacy GET limit", async () => {
 		let fetchCalled = false;
 
diff --git a/apps/web/src/lib/tts/legacy.ts b/apps/web/src/lib/tts/legacy.ts
index 30e9e41..189a528 100644
--- a/apps/web/src/lib/tts/legacy.ts
+++ b/apps/web/src/lib/tts/legacy.ts
@@ -79,11 +79,12 @@ export async function synthesizeSpeechWithLegacyProvider({
 	const contentType = audioResponse.headers.get("content-type") ?? "";
 
 	if (
-		contentType &&
 		!contentType.includes("audio/") &&
 		contentType !== "application/octet-stream"
 	) {
-		throw new Error(`Legacy TTS returned non-audio content: ${contentType}`);
+		throw new Error(
+			`Legacy TTS returned non-audio content: ${contentType || "(no content-type)"}`,
+		);
 	}
 
 	const audio = await audioResponse.arrayBuffer();
diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index 86636a7..68f0434 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -173,6 +173,24 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		).rejects.toThrow("Expected audio response");
 	});
 
+	test("rejects success responses when the content-type header is missing", async () => {
+		await expect(
+			synthesizeSpeechWithOpenAiCompatible({
+				config: {
+					apiBaseUrl: "https://example.com/v1",
+					apiKey: "secret",
+					model: "tts-1",
+				},
+				text: "hello",
+				voice: "default",
+				fetchImpl: async () =>
+					new Response(Uint8Array.from([1, 2, 3]), {
+						status: 200,
+					}),
+			}),
+		).rejects.toThrow("Expected audio response");
+	});
+
 	test("aborts upstream requests that exceed the timeout", async () => {
 		await expect(
 			synthesizeSpeechWithOpenAiCompatible({
@@ -214,4 +232,23 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 			}),
 		).rejects.toThrow("gateway timeout");
 	});
+
+	test("falls back to the raw upstream body when JSON shape is unrecognized", async () => {
+		await expect(
+			synthesizeSpeechWithOpenAiCompatible({
+				config: {
+					apiBaseUrl: "https://example.com/v1",
+					apiKey: "secret",
+					model: "tts-1",
+				},
+				text: "hello",
+				voice: "nova",
+				fetchImpl: async () =>
+					new Response('{"message":"bad request"}', {
+						status: 400,
+						headers: { "Content-Type": "application/json" },
+					}),
+			}),
+		).rejects.toThrow('{"message":"bad request"}');
+	});
 });
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index 4f25e85..ab54d8e 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -57,15 +57,22 @@ async function getUpstreamErrorMessage({
 	response: Response;
 }): Promise<string> {
 	const contentType = response.headers.get("content-type") ?? "";
+	const text = await response.text().catch(() => "");
 
 	if (contentType.includes("application/json")) {
-		const json = (await response.json().catch(() => null)) as {
-			error?:
-				| string
-				| {
-						message?: string;
-				  };
-		} | null;
+		const json = (() => {
+			try {
+				return JSON.parse(text) as {
+					error?:
+						| string
+						| {
+								message?: string;
+						  };
+				} | null;
+			} catch {
+				return null;
+			}
+		})();
 
 		if (typeof json?.error === "string" && json.error.trim()) {
 			return json.error;
@@ -80,7 +87,6 @@ async function getUpstreamErrorMessage({
 		}
 	}
 
-	const text = await response.text().catch(() => "");
 	if (text.trim()) {
 		return text;
 	}
@@ -150,11 +156,12 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 			const contentType = response.headers.get("content-type") ?? "";
 
 			if (
-				contentType &&
 				!contentType.includes("audio/") &&
 				contentType !== "application/octet-stream"
 			) {
-				throw new Error(`Expected audio response, received ${contentType}`);
+				throw new Error(
+					`Expected audio response, received ${contentType || "(no content-type)"}`,
+				);
 			}
 
 			const audio = await response.arrayBuffer();
diff --git a/docs/plans/2026-03-17-tts-external-provider.md b/docs/plans/2026-03-17-tts-external-provider.md
index a1ee5c9..88c520b 100644
--- a/docs/plans/2026-03-17-tts-external-provider.md
+++ b/docs/plans/2026-03-17-tts-external-provider.md
@@ -44,7 +44,7 @@ git add packages/env/src/web.ts apps/web/src/constants/tts-constants.ts
 git commit -m "feat: prepare external tts config"
 ```
 
-### Task 2: 先写失败测试覆盖外部 TTS 适配层
+## Task 2: 先写失败测试覆盖外部 TTS 适配层
 
 **Files:**
 - Create: `apps/web/src/lib/tts/openai-compatible.test.ts`
@@ -80,7 +80,7 @@ git add apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-co
 git commit -m "feat: add external tts adapter"
 ```
 
-### Task 3: 接回 API 路由
+## Task 3: 接回 API 路由
 
 **Files:**
 - Modify: `apps/web/src/app/api/tts/generate/route.ts`
@@ -113,7 +113,7 @@ git add apps/web/src/app/api/tts/generate/route.ts
 git commit -m "feat: wire route to external tts provider"
 ```
 
-### Task 4: 端到端验证与整理
+## Task 4: 端到端验证与整理
 
 **Files:**
 - Modify: `docs/plans/2026-03-17-tts-external-provider-design.md`

From 7b0376f4fe7cbbc225b5a160b1e212ba1fef6cdb Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Tue, 17 Mar 2026 23:36:47 +0800
Subject: [PATCH 04/22] fix(tts): preserve aborts and normalize config parsing

Summary:
- compose caller cancellation with timeout handling in fetchWithTimeout and
  cover immediate and in-flight aborts with tests
- normalize legacy and OpenAI-compatible MIME checks so valid audio types with
  casing or parameters are accepted
- trim external TTS config values before validation and remove the
  assistant-specific directive from the plan doc

Rationale:
- CodeRabbit identified a real correctness issue where caller aborts were
  overwritten by timeout logic inside the fetch wrapper
- MIME checks should validate the media type, not the raw header string, or
  valid responses can be rejected unnecessarily
- whitespace-only config values should fail fast instead of surfacing opaque
  upstream errors later in the request path

Tests:
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.ts apps/web/src/lib/tts/fetch-with-timeout.ts apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/legacy.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/constants/tts-constants.ts packages/env/src/web.ts docs/plans/2026-03-17-tts-external-provider.md docs/plans/2026-03-17-tts-external-provider-design.md
- bunx tsc -p apps/web/tsconfig.json --noEmit
- NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:4100 UPSTASH_REDIS_REST_URL=https://example.com UPSTASH_REDIS_REST_TOKEN=dummy bun --eval 'import { NextRequest } from "next/server"; import { POST } from "./src/app/api/tts/generate/route.ts"; const request = new NextRequest("http://localhost/api/tts/generate", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ text: "hello", voice: "default" }) }); const response = await POST(request); const json = await response.json(); console.log(JSON.stringify({ status: response.status, audioLength: json.audio?.length ?? 0, audioHead: json.audio?.slice(0, 8) ?? null, error: json.error ?? null }));' (workdir: apps/web)

Co-authored-by: Codex <codex@openai.com>
---
 .../src/lib/tts/fetch-with-timeout.test.ts    | 51 +++++++++++++++++++
 apps/web/src/lib/tts/fetch-with-timeout.ts    | 30 ++++++++++-
 apps/web/src/lib/tts/legacy.test.ts           | 21 ++++++++
 apps/web/src/lib/tts/legacy.ts                |  5 +-
 .../web/src/lib/tts/openai-compatible.test.ts | 31 +++++++++++
 apps/web/src/lib/tts/openai-compatible.ts     | 19 +++++--
 .../plans/2026-03-17-tts-external-provider.md |  2 -
 7 files changed, 148 insertions(+), 11 deletions(-)
 create mode 100644 apps/web/src/lib/tts/fetch-with-timeout.test.ts

diff --git a/apps/web/src/lib/tts/fetch-with-timeout.test.ts b/apps/web/src/lib/tts/fetch-with-timeout.test.ts
new file mode 100644
index 0000000..880fc93
--- /dev/null
+++ b/apps/web/src/lib/tts/fetch-with-timeout.test.ts
@@ -0,0 +1,51 @@
+import { describe, expect, test } from "bun:test";
+import { fetchWithTimeout } from "./fetch-with-timeout";
+
+describe("fetchWithTimeout", () => {
+	test("rejects immediately when the caller signal is already aborted", async () => {
+		const controller = new AbortController();
+		const callerError = new Error("caller aborted");
+		let fetchCalled = false;
+
+		controller.abort(callerError);
+
+		await expect(
+			fetchWithTimeout({
+				fetchImpl: async () => {
+					fetchCalled = true;
+					return new Response("ok");
+				},
+				init: { signal: controller.signal },
+				input: "https://example.com",
+				timeoutMessage: "timed out",
+				timeoutMs: 50,
+			}),
+		).rejects.toThrow("caller aborted");
+
+		expect(fetchCalled).toBe(false);
+	});
+
+	test("surfaces caller cancellation for in-flight requests", async () => {
+		const controller = new AbortController();
+		const callerError = new Error("caller aborted");
+
+		await expect(
+			fetchWithTimeout({
+				fetchImpl: async (_input, init) =>
+					new Promise((_resolve, reject) => {
+						setTimeout(() => controller.abort(callerError), 0);
+
+						init?.signal?.addEventListener(
+							"abort",
+							() => reject(init.signal?.reason ?? new Error("aborted")),
+							{ once: true },
+						);
+					}),
+				init: { signal: controller.signal },
+				input: "https://example.com",
+				timeoutMessage: "timed out",
+				timeoutMs: 50,
+			}),
+		).rejects.toThrow("caller aborted");
+	});
+});
diff --git a/apps/web/src/lib/tts/fetch-with-timeout.ts b/apps/web/src/lib/tts/fetch-with-timeout.ts
index 49fbca7..cc6960e 100644
--- a/apps/web/src/lib/tts/fetch-with-timeout.ts
+++ b/apps/web/src/lib/tts/fetch-with-timeout.ts
@@ -17,7 +17,24 @@ export async function fetchWithTimeout({
 	timeoutMessage: string;
 }): Promise<Response> {
 	const controller = new AbortController();
-	const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
+	const callerSignal = init?.signal;
+	let didTimeout = false;
+	const abortFromCaller = () => controller.abort(callerSignal?.reason);
+
+	if (callerSignal?.aborted) {
+		if (callerSignal.reason instanceof Error) {
+			throw callerSignal.reason;
+		}
+
+		throw new Error(String(callerSignal.reason ?? "Request aborted"));
+	}
+
+	callerSignal?.addEventListener("abort", abortFromCaller, { once: true });
+
+	const timeoutId = setTimeout(() => {
+		didTimeout = true;
+		controller.abort(new Error(timeoutMessage));
+	}, timeoutMs);
 
 	try {
 		return await fetchImpl(input, {
@@ -25,12 +42,21 @@ export async function fetchWithTimeout({
 			signal: controller.signal,
 		});
 	} catch (error) {
-		if (controller.signal.aborted) {
+		if (didTimeout) {
 			throw new Error(timeoutMessage);
 		}
 
+		if (callerSignal?.aborted) {
+			if (callerSignal.reason instanceof Error) {
+				throw callerSignal.reason;
+			}
+
+			throw new Error(String(callerSignal.reason ?? "Request aborted"));
+		}
+
 		throw error;
 	} finally {
 		clearTimeout(timeoutId);
+		callerSignal?.removeEventListener("abort", abortFromCaller);
 	}
 }
diff --git a/apps/web/src/lib/tts/legacy.test.ts b/apps/web/src/lib/tts/legacy.test.ts
index 396119b..6ece491 100644
--- a/apps/web/src/lib/tts/legacy.test.ts
+++ b/apps/web/src/lib/tts/legacy.test.ts
@@ -62,6 +62,27 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 		).rejects.toThrow("Legacy TTS returned non-audio content");
 	});
 
+	test("accepts audio downloads when the MIME type casing and parameters vary", async () => {
+		const audio = await synthesizeSpeechWithLegacyProvider({
+			text: "hello",
+			fetchImpl: async (input) => {
+				if (String(input).includes("/apis/mbAIsc?")) {
+					return Response.json({
+						code: 200,
+						url: "https://api.milorapart.top/voice/test.mp3",
+					});
+				}
+
+				return new Response(Uint8Array.from([1, 2, 3]), {
+					status: 200,
+					headers: { "Content-Type": "Audio/MPEG; Charset=utf-8" },
+				});
+			},
+		});
+
+		expect(Array.from(new Uint8Array(audio))).toEqual([1, 2, 3]);
+	});
+
 	test("rejects synthesis text that would exceed the legacy GET limit", async () => {
 		let fetchCalled = false;
 
diff --git a/apps/web/src/lib/tts/legacy.ts b/apps/web/src/lib/tts/legacy.ts
index 189a528..c796f45 100644
--- a/apps/web/src/lib/tts/legacy.ts
+++ b/apps/web/src/lib/tts/legacy.ts
@@ -77,10 +77,11 @@ export async function synthesizeSpeechWithLegacyProvider({
 	}
 
 	const contentType = audioResponse.headers.get("content-type") ?? "";
+	const mimeType = contentType.split(";")[0]?.trim().toLowerCase() ?? "";
 
 	if (
-		!contentType.includes("audio/") &&
-		contentType !== "application/octet-stream"
+		!mimeType.startsWith("audio/") &&
+		mimeType !== "application/octet-stream"
 	) {
 		throw new Error(
 			`Legacy TTS returned non-audio content: ${contentType || "(no content-type)"}`,
diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index 68f0434..58883cc 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -32,6 +32,18 @@ describe("getExternalTtsConfig", () => {
 			}),
 		).toThrow("External TTS is not configured");
 	});
+
+	test("rejects whitespace-only config values", () => {
+		expect(() =>
+			getExternalTtsConfig({
+				env: {
+					API_BASE_URL: "   ",
+					API_MODEL: "  ",
+					API_KEY: "   ",
+				},
+			}),
+		).toThrow("External TTS is not configured");
+	});
 });
 
 describe("synthesizeSpeechWithOpenAiCompatible", () => {
@@ -191,6 +203,25 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		).rejects.toThrow("Expected audio response");
 	});
 
+	test("accepts audio responses when MIME type casing and parameters vary", async () => {
+		const audio = await synthesizeSpeechWithOpenAiCompatible({
+			config: {
+				apiBaseUrl: "https://example.com/v1",
+				apiKey: "secret",
+				model: "tts-1",
+			},
+			text: "hello",
+			voice: "default",
+			fetchImpl: async () =>
+				new Response(Uint8Array.from([1, 2, 3]), {
+					status: 200,
+					headers: { "Content-Type": "Audio/MPEG; Charset=utf-8" },
+				}),
+		});
+
+		expect(Array.from(new Uint8Array(audio))).toEqual([1, 2, 3]);
+	});
+
 	test("aborts upstream requests that exceed the timeout", async () => {
 		await expect(
 			synthesizeSpeechWithOpenAiCompatible({
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index ab54d8e..e4821f7 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -36,10 +36,18 @@ export function getExternalTtsConfig({
 		throw new Error("External TTS is not configured");
 	}
 
+	const apiBaseUrl = parsed.data.API_BASE_URL.trim().replace(/\/+$/, "");
+	const apiKey = parsed.data.API_KEY.trim();
+	const model = parsed.data.API_MODEL.trim();
+
+	if (!apiBaseUrl || !apiKey || !model) {
+		throw new Error("External TTS is not configured");
+	}
+
 	return {
-		apiBaseUrl: parsed.data.API_BASE_URL.replace(/\/+$/, ""),
-		apiKey: parsed.data.API_KEY,
-		model: parsed.data.API_MODEL,
+		apiBaseUrl,
+		apiKey,
+		model,
 	};
 }
 
@@ -154,10 +162,11 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 
 		if (response.ok) {
 			const contentType = response.headers.get("content-type") ?? "";
+			const mimeType = contentType.split(";")[0]?.trim().toLowerCase() ?? "";
 
 			if (
-				!contentType.includes("audio/") &&
-				contentType !== "application/octet-stream"
+				!mimeType.startsWith("audio/") &&
+				mimeType !== "application/octet-stream"
 			) {
 				throw new Error(
 					`Expected audio response, received ${contentType || "(no content-type)"}`,
diff --git a/docs/plans/2026-03-17-tts-external-provider.md b/docs/plans/2026-03-17-tts-external-provider.md
index 88c520b..34a07e7 100644
--- a/docs/plans/2026-03-17-tts-external-provider.md
+++ b/docs/plans/2026-03-17-tts-external-provider.md
@@ -1,7 +1,5 @@
 # 外部 TTS 扩展 Implementation Plan
 
-> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
-
 **Goal:** 让 Cutia 的 TTS 能力从硬编码单一路由改为可配置的外部 TTS API 调用，并继续把生成语音接入媒体库和时间线。
 
 **Architecture:** 在 `apps/web/src/lib/tts/` 新增可测试的 OpenAI 兼容 TTS 适配层，`/api/tts/generate` 只负责校验和响应转换，前端调用协议保持 `{ audio }` 不变。通过 `packages/env` 暴露配置，避免把供应商细节散落到 UI 和编辑器逻辑里。

From d7c896a98a65684bd9b63781b6b136e8e5ccd342 Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Tue, 17 Mar 2026 23:43:37 +0800
Subject: [PATCH 05/22] refactor(tts): clean up provider helper types

Summary:
- export FetchLike from fetch-with-timeout and reuse it across the legacy and
  OpenAI-compatible adapters
- extract the duplicated caller-abort error construction into a shared helper
  inside fetch-with-timeout
- keep the legacy voice argument for adapter parity while making its intent
  explicit in code

Rationale:
- the remaining CodeRabbit comments were all maintenance-only cleanup items
  with no behavior change required
- sharing the helper type and abort-reason logic reduces repetition without
  widening the TTS error-handling surface
- documenting the unused legacy voice parameter makes the interface parity
  intentional instead of accidental

Tests:
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts
- bunx @biomejs/biome check apps/web/src/lib/tts/fetch-with-timeout.ts apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/legacy.ts apps/web/src/lib/tts/openai-compatible.ts docs/plans/2026-03-17-tts-external-provider.md
- bunx tsc -p apps/web/tsconfig.json --noEmit

Co-authored-by: Codex <codex@openai.com>
---
 apps/web/src/lib/tts/fetch-with-timeout.ts | 22 +++++++++++-----------
 apps/web/src/lib/tts/legacy.ts             | 10 ++++------
 apps/web/src/lib/tts/openai-compatible.ts  |  7 +------
 3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/apps/web/src/lib/tts/fetch-with-timeout.ts b/apps/web/src/lib/tts/fetch-with-timeout.ts
index cc6960e..47ab95b 100644
--- a/apps/web/src/lib/tts/fetch-with-timeout.ts
+++ b/apps/web/src/lib/tts/fetch-with-timeout.ts
@@ -1,8 +1,16 @@
-type FetchLike = (
+export type FetchLike = (
 	input: RequestInfo | URL,
 	init?: RequestInit,
 ) => Promise<Response>;
 
+function throwCallerAbortReason(signal: AbortSignal): never {
+	if (signal.reason instanceof Error) {
+		throw signal.reason;
+	}
+
+	throw new Error(String(signal.reason ?? "Request aborted"));
+}
+
 export async function fetchWithTimeout({
 	fetchImpl,
 	input,
@@ -22,11 +30,7 @@ export async function fetchWithTimeout({
 	const abortFromCaller = () => controller.abort(callerSignal?.reason);
 
 	if (callerSignal?.aborted) {
-		if (callerSignal.reason instanceof Error) {
-			throw callerSignal.reason;
-		}
-
-		throw new Error(String(callerSignal.reason ?? "Request aborted"));
+		throwCallerAbortReason(callerSignal);
 	}
 
 	callerSignal?.addEventListener("abort", abortFromCaller, { once: true });
@@ -47,11 +51,7 @@ export async function fetchWithTimeout({
 		}
 
 		if (callerSignal?.aborted) {
-			if (callerSignal.reason instanceof Error) {
-				throw callerSignal.reason;
-			}
-
-			throw new Error(String(callerSignal.reason ?? "Request aborted"));
+			throwCallerAbortReason(callerSignal);
 		}
 
 		throw error;
diff --git a/apps/web/src/lib/tts/legacy.ts b/apps/web/src/lib/tts/legacy.ts
index c796f45..83b6d88 100644
--- a/apps/web/src/lib/tts/legacy.ts
+++ b/apps/web/src/lib/tts/legacy.ts
@@ -1,5 +1,5 @@
 import { z } from "zod";
-import { fetchWithTimeout } from "./fetch-with-timeout";
+import { fetchWithTimeout, type FetchLike } from "./fetch-with-timeout";
 
 const LEGACY_TTS_API_BASE = "https://api.milorapart.top/apis/mbAIsc";
 const LEGACY_TTS_ALLOWED_AUDIO_HOSTS = new Set(["api.milorapart.top"]);
@@ -11,13 +11,9 @@ const legacyResponseSchema = z.object({
 	url: z.string().url(),
 });
 
-type FetchLike = (
-	input: RequestInfo | URL,
-	init?: RequestInit,
-) => Promise<Response>;
-
 export async function synthesizeSpeechWithLegacyProvider({
 	text,
+	voice: _voice,
 	fetchImpl = fetch,
 	timeoutMs = LEGACY_TTS_TIMEOUT_MS,
 }: {
@@ -26,6 +22,8 @@ export async function synthesizeSpeechWithLegacyProvider({
 	fetchImpl?: FetchLike;
 	timeoutMs?: number;
 }): Promise<ArrayBuffer> {
+	void _voice; // Legacy upstream has a fixed voice; keep the arg for parity.
+
 	const query = new URLSearchParams({
 		format: "mp3",
 		text,
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index e4821f7..ccea62c 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -3,7 +3,7 @@ import {
 	DEFAULT_EXTERNAL_TTS_VOICE,
 	DEFAULT_VOICE_PACK,
 } from "@/constants/tts-constants";
-import { fetchWithTimeout } from "./fetch-with-timeout";
+import { fetchWithTimeout, type FetchLike } from "./fetch-with-timeout";
 
 const externalTtsConfigSchema = z.object({
 	API_BASE_URL: z.string().min(1),
@@ -20,11 +20,6 @@ export interface ExternalTtsConfig {
 	model: string;
 }
 
-type FetchLike = (
-	input: RequestInfo | URL,
-	init?: RequestInit,
-) => Promise<Response>;
-
 export function getExternalTtsConfig({
 	env,
 }: {

From d71ca3440d4cf3c9511de08c2613c61309f0558d Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Thu, 19 Mar 2026 02:55:55 +0800
Subject: [PATCH 06/22] fix(tts): use structured provider error codes

Summary:
- add shared TTS error codes and typed TtsError helpers
- emit structured config and upstream failures from TTS providers
- map route responses by error code and add regression tests

Rationale:
- avoid coupling API status mapping to fragile message text
- keep provider fallback behavior explicit while preserving readable errors
- close the remaining PR review item with direct route-level coverage

Tests:
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.ts apps/web/src/app/api/tts/generate/route.test.ts apps/web/src/lib/tts/provider.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/legacy.ts apps/web/src/lib/tts/errors.ts
- bunx tsc -p apps/web/tsconfig.json --noEmit
- POST /api/tts/generate probe: status=200 audioHead=SUQzBAAA

Co-authored-by: Codex <codex@openai.com>
---
 .../src/app/api/tts/generate/route.test.ts    | 85 +++++++++++++++++++
 apps/web/src/app/api/tts/generate/route.ts    | 19 ++---
 apps/web/src/lib/tts/errors.ts                | 31 +++++++
 apps/web/src/lib/tts/legacy.ts                | 40 ++++++---
 apps/web/src/lib/tts/openai-compatible.ts     | 30 +++++--
 apps/web/src/lib/tts/provider.test.ts         | 13 +++
 apps/web/src/lib/tts/provider.ts              |  6 +-
 7 files changed, 190 insertions(+), 34 deletions(-)
 create mode 100644 apps/web/src/app/api/tts/generate/route.test.ts
 create mode 100644 apps/web/src/lib/tts/errors.ts

diff --git a/apps/web/src/app/api/tts/generate/route.test.ts b/apps/web/src/app/api/tts/generate/route.test.ts
new file mode 100644
index 0000000..a2849f8
--- /dev/null
+++ b/apps/web/src/app/api/tts/generate/route.test.ts
@@ -0,0 +1,85 @@
+import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
+
+let synthesizeImpl: typeof import("@/lib/tts/provider").synthesizeSpeechWithFallback;
+const originalConsoleError = console.error;
+
+mock.module("@cutia/env/web", () => ({
+	webEnv: {
+		API_BASE_URL: "https://example.com/v1",
+		API_MODEL: "tts-1",
+		API_KEY: "secret",
+	},
+}));
+
+mock.module("@/lib/tts/provider", () => ({
+	synthesizeSpeechWithFallback: (args: Parameters<typeof synthesizeImpl>[0]) =>
+		synthesizeImpl(args),
+}));
+
+const { POST } = await import("./route");
+
+function createRequest(body: unknown): Request {
+	return new Request("http://localhost/api/tts/generate", {
+		body: JSON.stringify(body),
+		headers: {
+			"content-type": "application/json",
+		},
+		method: "POST",
+	});
+}
+
+describe("POST /api/tts/generate", () => {
+	beforeEach(() => {
+		console.error = mock(() => {});
+		synthesizeImpl = async () => Uint8Array.from([1, 2, 3]).buffer;
+	});
+
+	afterEach(() => {
+		console.error = originalConsoleError;
+	});
+
+	test("returns 502 for structured legacy upstream errors without relying on message prefixes", async () => {
+		synthesizeImpl = async () => {
+			throw Object.assign(new Error("legacy fallback audio download failed"), {
+				code: "LEGACY_TTS_UPSTREAM",
+			});
+		};
+
+		const response = await POST(createRequest({ text: "hello" }) as never);
+
+		expect(response.status).toBe(502);
+		expect(await response.json()).toEqual({
+			error: "legacy fallback audio download failed",
+		});
+	});
+
+	test("returns 502 for structured external upstream errors without relying on message prefixes", async () => {
+		synthesizeImpl = async () => {
+			throw Object.assign(new Error("upstream gateway timeout"), {
+				code: "EXTERNAL_TTS_UPSTREAM",
+			});
+		};
+
+		const response = await POST(createRequest({ text: "hello" }) as never);
+
+		expect(response.status).toBe(502);
+		expect(await response.json()).toEqual({
+			error: "upstream gateway timeout",
+		});
+	});
+
+	test("returns the original config error message for structured config failures", async () => {
+		synthesizeImpl = async () => {
+			throw Object.assign(new Error("external config missing"), {
+				code: "EXTERNAL_TTS_CONFIG",
+			});
+		};
+
+		const response = await POST(createRequest({ text: "hello" }) as never);
+
+		expect(response.status).toBe(500);
+		expect(await response.json()).toEqual({
+			error: "external config missing",
+		});
+	});
+});
diff --git a/apps/web/src/app/api/tts/generate/route.ts b/apps/web/src/app/api/tts/generate/route.ts
index 9155317..5e70366 100644
--- a/apps/web/src/app/api/tts/generate/route.ts
+++ b/apps/web/src/app/api/tts/generate/route.ts
@@ -1,6 +1,7 @@
 import { webEnv } from "@cutia/env/web";
 import { type NextRequest, NextResponse } from "next/server";
 import { z } from "zod";
+import { isTtsError } from "@/lib/tts/errors";
 import { synthesizeSpeechWithFallback } from "@/lib/tts/provider";
 
 const requestSchema = z.object({
@@ -36,16 +37,14 @@ export async function POST(request: NextRequest) {
 		const message = error instanceof Error ? error.message : "Unknown error";
 		console.error("TTS generate error:", error);
 
-		if (message === "External TTS is not configured") {
-			return NextResponse.json({ error: message }, { status: 500 });
-		}
-
-		if (
-			message.startsWith("External TTS request failed:") ||
-			message === "External TTS returned empty audio" ||
-			message.startsWith("Legacy TTS ")
-		) {
-			return NextResponse.json({ error: message }, { status: 502 });
+		if (isTtsError(error)) {
+			switch (error.code) {
+				case "EXTERNAL_TTS_CONFIG":
+					return NextResponse.json({ error: message }, { status: 500 });
+				case "EXTERNAL_TTS_UPSTREAM":
+				case "LEGACY_TTS_UPSTREAM":
+					return NextResponse.json({ error: message }, { status: 502 });
+			}
 		}
 
 		return NextResponse.json(
diff --git a/apps/web/src/lib/tts/errors.ts b/apps/web/src/lib/tts/errors.ts
new file mode 100644
index 0000000..d3f7bc4
--- /dev/null
+++ b/apps/web/src/lib/tts/errors.ts
@@ -0,0 +1,31 @@
+export const TTS_ERROR_CODES = [
+	"EXTERNAL_TTS_CONFIG",
+	"EXTERNAL_TTS_UPSTREAM",
+	"LEGACY_TTS_UPSTREAM",
+] as const;
+
+export type TtsErrorCode = (typeof TTS_ERROR_CODES)[number];
+
+export class TtsError extends Error {
+	code: TtsErrorCode;
+
+	constructor({
+		code,
+		message,
+	}: {
+		code: TtsErrorCode;
+		message: string;
+	}) {
+		super(message);
+		this.name = "TtsError";
+		this.code = code;
+	}
+}
+
+export function isTtsError(error: unknown): error is TtsError {
+	if (!(error instanceof Error)) {
+		return false;
+	}
+
+	return TTS_ERROR_CODES.includes((error as TtsError).code);
+}
diff --git a/apps/web/src/lib/tts/legacy.ts b/apps/web/src/lib/tts/legacy.ts
index 83b6d88..72bd5c2 100644
--- a/apps/web/src/lib/tts/legacy.ts
+++ b/apps/web/src/lib/tts/legacy.ts
@@ -1,5 +1,6 @@
 import { z } from "zod";
 import { fetchWithTimeout, type FetchLike } from "./fetch-with-timeout";
+import { TtsError } from "./errors";
 
 const LEGACY_TTS_API_BASE = "https://api.milorapart.top/apis/mbAIsc";
 const LEGACY_TTS_ALLOWED_AUDIO_HOSTS = new Set(["api.milorapart.top"]);
@@ -31,7 +32,10 @@ export async function synthesizeSpeechWithLegacyProvider({
 	const upstreamUrl = `${LEGACY_TTS_API_BASE}?${query}`;
 
 	if (upstreamUrl.length > LEGACY_TTS_MAX_URL_LENGTH) {
-		throw new Error("Legacy TTS text is too long for GET fallback");
+		throw new TtsError({
+			code: "LEGACY_TTS_UPSTREAM",
+			message: "Legacy TTS text is too long for GET fallback",
+		});
 	}
 
 	const upstreamResponse = await fetchWithTimeout({
@@ -42,14 +46,20 @@ export async function synthesizeSpeechWithLegacyProvider({
 	});
 
 	if (!upstreamResponse.ok) {
-		throw new Error(`Legacy TTS request failed: ${upstreamResponse.status}`);
+		throw new TtsError({
+			code: "LEGACY_TTS_UPSTREAM",
+			message: `Legacy TTS request failed: ${upstreamResponse.status}`,
+		});
 	}
 
 	const upstreamJson = await upstreamResponse.json().catch(() => null);
 	const parsed = legacyResponseSchema.safeParse(upstreamJson);
 
 	if (!parsed.success || parsed.data.code !== 200) {
-		throw new Error("Legacy TTS generation failed");
+		throw new TtsError({
+			code: "LEGACY_TTS_UPSTREAM",
+			message: "Legacy TTS generation failed",
+		});
 	}
 
 	const audioUrl = new URL(parsed.data.url);
@@ -58,7 +68,10 @@ export async function synthesizeSpeechWithLegacyProvider({
 		audioUrl.protocol !== "https:" ||
 		!LEGACY_TTS_ALLOWED_AUDIO_HOSTS.has(audioUrl.hostname)
 	) {
-		throw new Error("Legacy TTS returned an unexpected audio URL");
+		throw new TtsError({
+			code: "LEGACY_TTS_UPSTREAM",
+			message: "Legacy TTS returned an unexpected audio URL",
+		});
 	}
 
 	const audioResponse = await fetchWithTimeout({
@@ -69,9 +82,10 @@ export async function synthesizeSpeechWithLegacyProvider({
 	});
 
 	if (!audioResponse.ok) {
-		throw new Error(
-			`Legacy TTS audio download failed: ${audioResponse.status}`,
-		);
+		throw new TtsError({
+			code: "LEGACY_TTS_UPSTREAM",
+			message: `Legacy TTS audio download failed: ${audioResponse.status}`,
+		});
 	}
 
 	const contentType = audioResponse.headers.get("content-type") ?? "";
@@ -81,15 +95,19 @@ export async function synthesizeSpeechWithLegacyProvider({
 		!mimeType.startsWith("audio/") &&
 		mimeType !== "application/octet-stream"
 	) {
-		throw new Error(
-			`Legacy TTS returned non-audio content: ${contentType || "(no content-type)"}`,
-		);
+		throw new TtsError({
+			code: "LEGACY_TTS_UPSTREAM",
+			message: `Legacy TTS returned non-audio content: ${contentType || "(no content-type)"}`,
+		});
 	}
 
 	const audio = await audioResponse.arrayBuffer();
 
 	if (audio.byteLength === 0) {
-		throw new Error("Legacy TTS returned empty audio");
+		throw new TtsError({
+			code: "LEGACY_TTS_UPSTREAM",
+			message: "Legacy TTS returned empty audio",
+		});
 	}
 
 	return audio;
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index ccea62c..2cd639b 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -4,6 +4,7 @@ import {
 	DEFAULT_VOICE_PACK,
 } from "@/constants/tts-constants";
 import { fetchWithTimeout, type FetchLike } from "./fetch-with-timeout";
+import { TtsError } from "./errors";
 
 const externalTtsConfigSchema = z.object({
 	API_BASE_URL: z.string().min(1),
@@ -28,7 +29,10 @@ export function getExternalTtsConfig({
 	const parsed = externalTtsConfigSchema.safeParse(env);
 
 	if (!parsed.success) {
-		throw new Error("External TTS is not configured");
+		throw new TtsError({
+			code: "EXTERNAL_TTS_CONFIG",
+			message: "External TTS is not configured",
+		});
 	}
 
 	const apiBaseUrl = parsed.data.API_BASE_URL.trim().replace(/\/+$/, "");
@@ -36,7 +40,10 @@ export function getExternalTtsConfig({
 	const model = parsed.data.API_MODEL.trim();
 
 	if (!apiBaseUrl || !apiKey || !model) {
-		throw new Error("External TTS is not configured");
+		throw new TtsError({
+			code: "EXTERNAL_TTS_CONFIG",
+			message: "External TTS is not configured",
+		});
 	}
 
 	return {
@@ -163,15 +170,19 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 				!mimeType.startsWith("audio/") &&
 				mimeType !== "application/octet-stream"
 			) {
-				throw new Error(
-					`Expected audio response, received ${contentType || "(no content-type)"}`,
-				);
+				throw new TtsError({
+					code: "EXTERNAL_TTS_UPSTREAM",
+					message: `Expected audio response, received ${contentType || "(no content-type)"}`,
+				});
 			}
 
 			const audio = await response.arrayBuffer();
 
 			if (audio.byteLength === 0) {
-				throw new Error("External TTS returned empty audio");
+				throw new TtsError({
+					code: "EXTERNAL_TTS_UPSTREAM",
+					message: "External TTS returned empty audio",
+				});
 			}
 
 			return audio;
@@ -184,9 +195,10 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 		}
 	}
 
-	throw new Error(
-		`External TTS request failed: ${await getUpstreamErrorMessage({
+	throw new TtsError({
+		code: "EXTERNAL_TTS_UPSTREAM",
+		message: `External TTS request failed: ${await getUpstreamErrorMessage({
 			response: lastErrorResponse ?? new Response(null, { status: 500 }),
 		})}`,
-	);
+	});
 }
diff --git a/apps/web/src/lib/tts/provider.test.ts b/apps/web/src/lib/tts/provider.test.ts
index 561076d..06dd5a1 100644
--- a/apps/web/src/lib/tts/provider.test.ts
+++ b/apps/web/src/lib/tts/provider.test.ts
@@ -74,4 +74,17 @@ describe("synthesizeSpeechWithFallback", () => {
 		expect(openAiCalled).toBe(false);
 		expect(legacyCalled).toBe(false);
 	});
+
+	test("rethrows missing external config with a structured error code", async () => {
+		await expect(
+			synthesizeSpeechWithFallback({
+				env: {},
+				text: "hello",
+				voice: "default",
+			}),
+		).rejects.toMatchObject({
+			code: "EXTERNAL_TTS_CONFIG",
+			message: "External TTS is not configured",
+		});
+	});
 });
diff --git a/apps/web/src/lib/tts/provider.ts b/apps/web/src/lib/tts/provider.ts
index 9a0a02f..39f0669 100644
--- a/apps/web/src/lib/tts/provider.ts
+++ b/apps/web/src/lib/tts/provider.ts
@@ -2,6 +2,7 @@ import {
 	getExternalTtsConfig,
 	synthesizeSpeechWithOpenAiCompatible,
 } from "./openai-compatible";
+import { isTtsError } from "./errors";
 import { synthesizeSpeechWithLegacyProvider } from "./legacy";
 
 type TtsEnv = {
@@ -27,10 +28,7 @@ export async function synthesizeSpeechWithFallback({
 		const config = getExternalTtsConfig({ env });
 		return await openAiSynthesize({ config, text, voice });
 	} catch (error) {
-		if (
-			error instanceof Error &&
-			error.message === "External TTS is not configured"
-		) {
+		if (isTtsError(error) && error.code === "EXTERNAL_TTS_CONFIG") {
 			throw error;
 		}
 

From 44fdfa5d1e9e013ac545473445b2caef43777399 Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Thu, 19 Mar 2026 03:16:43 +0800
Subject: [PATCH 07/22] fix(tts): classify timeout failures as upstream errors

Summary:
- wrap external and legacy timeout failures in structured upstream TTS errors
- add route success coverage and an exhaustive TTS error switch guard
- tighten timeout regression tests to assert error codes instead of messages

Rationale:
- keep upstream timeouts on the 502 path instead of falling through to 500
- preserve readable timeout messages while restoring consistent route behavior
- close the latest review round with direct regression coverage for the timeout path

Tests:
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.ts apps/web/src/app/api/tts/generate/route.test.ts apps/web/src/lib/tts/fetch-with-timeout.ts apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/provider.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/errors.ts apps/web/src/constants/tts-constants.ts packages/env/src/web.ts docs/plans/2026-03-17-tts-external-provider.md docs/plans/2026-03-17-tts-external-provider-design.md
- bunx tsc -p apps/web/tsconfig.json --noEmit
- NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:4311 UPSTASH_REDIS_REST_URL=https://example.com UPSTASH_REDIS_REST_TOKEN=dummy bun --eval 'import { NextRequest } from "next/server"; import { POST } from "./src/app/api/tts/generate/route.ts"; /* status=200 audioHead=SUQzBAAA */'

Co-authored-by: Codex <codex@openai.com>
---
 .../src/app/api/tts/generate/route.test.ts    |  9 ++++
 apps/web/src/app/api/tts/generate/route.ts    |  4 ++
 apps/web/src/lib/tts/legacy.test.ts           | 10 +++-
 apps/web/src/lib/tts/legacy.ts                | 48 ++++++++++++++-----
 .../web/src/lib/tts/openai-compatible.test.ts |  5 +-
 apps/web/src/lib/tts/openai-compatible.ts     | 32 ++++++++++---
 6 files changed, 86 insertions(+), 22 deletions(-)

diff --git a/apps/web/src/app/api/tts/generate/route.test.ts b/apps/web/src/app/api/tts/generate/route.test.ts
index a2849f8..cdf43b8 100644
--- a/apps/web/src/app/api/tts/generate/route.test.ts
+++ b/apps/web/src/app/api/tts/generate/route.test.ts
@@ -38,6 +38,15 @@ describe("POST /api/tts/generate", () => {
 		console.error = originalConsoleError;
 	});
 
+	test("returns base64 audio for successful synthesis", async () => {
+		const response = await POST(createRequest({ text: "hello" }) as never);
+
+		expect(response.status).toBe(200);
+		expect(await response.json()).toEqual({
+			audio: "AQID",
+		});
+	});
+
 	test("returns 502 for structured legacy upstream errors without relying on message prefixes", async () => {
 		synthesizeImpl = async () => {
 			throw Object.assign(new Error("legacy fallback audio download failed"), {
diff --git a/apps/web/src/app/api/tts/generate/route.ts b/apps/web/src/app/api/tts/generate/route.ts
index 5e70366..82ffde3 100644
--- a/apps/web/src/app/api/tts/generate/route.ts
+++ b/apps/web/src/app/api/tts/generate/route.ts
@@ -44,6 +44,10 @@ export async function POST(request: NextRequest) {
 				case "EXTERNAL_TTS_UPSTREAM":
 				case "LEGACY_TTS_UPSTREAM":
 					return NextResponse.json({ error: message }, { status: 502 });
+				default: {
+					const exhaustiveCode: never = error.code;
+					throw new Error(`Unhandled TTS error code: ${exhaustiveCode}`);
+				}
 			}
 		}
 
diff --git a/apps/web/src/lib/tts/legacy.test.ts b/apps/web/src/lib/tts/legacy.test.ts
index 6ece491..37e4afd 100644
--- a/apps/web/src/lib/tts/legacy.test.ts
+++ b/apps/web/src/lib/tts/legacy.test.ts
@@ -116,7 +116,10 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 						);
 					}),
 			}),
-		).rejects.toThrow("Legacy TTS request timed out");
+		).rejects.toMatchObject({
+			code: "LEGACY_TTS_UPSTREAM",
+			message: "Legacy TTS request timed out",
+		});
 	});
 
 	test("aborts the audio download when the legacy audio fetch hangs", async () => {
@@ -145,6 +148,9 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 					});
 				},
 			}),
-		).rejects.toThrow("Legacy TTS audio download timed out");
+		).rejects.toMatchObject({
+			code: "LEGACY_TTS_UPSTREAM",
+			message: "Legacy TTS audio download timed out",
+		});
 	});
 });
diff --git a/apps/web/src/lib/tts/legacy.ts b/apps/web/src/lib/tts/legacy.ts
index 72bd5c2..3f575e9 100644
--- a/apps/web/src/lib/tts/legacy.ts
+++ b/apps/web/src/lib/tts/legacy.ts
@@ -12,6 +12,18 @@ const legacyResponseSchema = z.object({
 	url: z.string().url(),
 });
 
+function wrapLegacyUpstreamError({ error }: { error: unknown }): TtsError {
+	if (error instanceof TtsError) {
+		return error;
+	}
+
+	return new TtsError({
+		code: "LEGACY_TTS_UPSTREAM",
+		message:
+			error instanceof Error ? error.message : "Legacy TTS generation failed",
+	});
+}
+
 export async function synthesizeSpeechWithLegacyProvider({
 	text,
 	voice: _voice,
@@ -38,12 +50,18 @@ export async function synthesizeSpeechWithLegacyProvider({
 		});
 	}
 
-	const upstreamResponse = await fetchWithTimeout({
-		fetchImpl,
-		input: upstreamUrl,
-		timeoutMessage: "Legacy TTS request timed out",
-		timeoutMs,
-	});
+	let upstreamResponse: Response;
+
+	try {
+		upstreamResponse = await fetchWithTimeout({
+			fetchImpl,
+			input: upstreamUrl,
+			timeoutMessage: "Legacy TTS request timed out",
+			timeoutMs,
+		});
+	} catch (error) {
+		throw wrapLegacyUpstreamError({ error });
+	}
 
 	if (!upstreamResponse.ok) {
 		throw new TtsError({
@@ -74,12 +92,18 @@ export async function synthesizeSpeechWithLegacyProvider({
 		});
 	}
 
-	const audioResponse = await fetchWithTimeout({
-		fetchImpl,
-		input: audioUrl,
-		timeoutMessage: "Legacy TTS audio download timed out",
-		timeoutMs,
-	});
+	let audioResponse: Response;
+
+	try {
+		audioResponse = await fetchWithTimeout({
+			fetchImpl,
+			input: audioUrl,
+			timeoutMessage: "Legacy TTS audio download timed out",
+			timeoutMs,
+		});
+	} catch (error) {
+		throw wrapLegacyUpstreamError({ error });
+	}
 
 	if (!audioResponse.ok) {
 		throw new TtsError({
diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index 58883cc..05872f7 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -242,7 +242,10 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 						);
 					}),
 			}),
-		).rejects.toThrow("External TTS request timed out");
+		).rejects.toMatchObject({
+			code: "EXTERNAL_TTS_UPSTREAM",
+			message: "External TTS request timed out",
+		});
 	});
 
 	test("surfaces upstream text errors when JSON is unavailable", async () => {
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index 2cd639b..93ec77d 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -21,6 +21,18 @@ export interface ExternalTtsConfig {
 	model: string;
 }
 
+function wrapExternalUpstreamError({ error }: { error: unknown }): TtsError {
+	if (error instanceof TtsError) {
+		return error;
+	}
+
+	return new TtsError({
+		code: "EXTERNAL_TTS_UPSTREAM",
+		message:
+			error instanceof Error ? error.message : "External TTS request failed",
+	});
+}
+
 export function getExternalTtsConfig({
 	env,
 }: {
@@ -154,13 +166,19 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 	let lastErrorResponse: Response | null = null;
 
 	for (const endpointUrl of endpointUrls) {
-		const response = await fetchWithTimeout({
-			fetchImpl,
-			init: requestInit,
-			input: endpointUrl,
-			timeoutMessage: "External TTS request timed out",
-			timeoutMs,
-		});
+		let response: Response;
+
+		try {
+			response = await fetchWithTimeout({
+				fetchImpl,
+				init: requestInit,
+				input: endpointUrl,
+				timeoutMessage: "External TTS request timed out",
+				timeoutMs,
+			});
+		} catch (error) {
+			throw wrapExternalUpstreamError({ error });
+		}
 
 		if (response.ok) {
 			const contentType = response.headers.get("content-type") ?? "";

From eaa3798f55fc01a2d65242b0ce154b7d837e33da Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Thu, 19 Mar 2026 03:47:37 +0800
Subject: [PATCH 08/22] fix(tts): narrow fallback and refresh design context

Summary:
- limit legacy fallback to structured EXTERNAL_TTS_UPSTREAM failures only
- switch route tests to real TtsError instances and add unexpected-error coverage
- remove orchestration-only design doc text and mark background as pre-change context

Rationale:
- surface unexpected external provider bugs instead of silently masking them
- keep test fixtures aligned with production error handling paths
- make the design doc describe architecture rather than orchestration metadata

Tests:
- bun test apps/web/src/lib/tts/provider.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.ts apps/web/src/app/api/tts/generate/route.test.ts apps/web/src/lib/tts/fetch-with-timeout.ts apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/provider.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/errors.ts apps/web/src/constants/tts-constants.ts packages/env/src/web.ts docs/plans/2026-03-17-tts-external-provider.md docs/plans/2026-03-17-tts-external-provider-design.md
- bunx tsc -p apps/web/tsconfig.json --noEmit
- NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:4311 UPSTASH_REDIS_REST_URL=https://example.com UPSTASH_REDIS_REST_TOKEN=dummy bun --eval 'import { NextRequest } from "next/server"; import { POST } from "./src/app/api/tts/generate/route.ts"; /* status=200 audioHead=SUQzBAAA */'

Co-authored-by: Codex <codex@openai.com>
---
 .../src/app/api/tts/generate/route.test.ts    | 10 ++++--
 apps/web/src/lib/tts/provider.test.ts         | 36 ++++++++++++++++---
 apps/web/src/lib/tts/provider.ts              |  4 +++
 ...2026-03-17-tts-external-provider-design.md |  8 ++---
 4 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/apps/web/src/app/api/tts/generate/route.test.ts b/apps/web/src/app/api/tts/generate/route.test.ts
index cdf43b8..27987a8 100644
--- a/apps/web/src/app/api/tts/generate/route.test.ts
+++ b/apps/web/src/app/api/tts/generate/route.test.ts
@@ -1,4 +1,5 @@
 import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
+import { TtsError } from "@/lib/tts/errors";
 
 let synthesizeImpl: typeof import("@/lib/tts/provider").synthesizeSpeechWithFallback;
 const originalConsoleError = console.error;
@@ -49,8 +50,9 @@ describe("POST /api/tts/generate", () => {
 
 	test("returns 502 for structured legacy upstream errors without relying on message prefixes", async () => {
 		synthesizeImpl = async () => {
-			throw Object.assign(new Error("legacy fallback audio download failed"), {
+			throw new TtsError({
 				code: "LEGACY_TTS_UPSTREAM",
+				message: "legacy fallback audio download failed",
 			});
 		};
 
@@ -64,8 +66,9 @@ describe("POST /api/tts/generate", () => {
 
 	test("returns 502 for structured external upstream errors without relying on message prefixes", async () => {
 		synthesizeImpl = async () => {
-			throw Object.assign(new Error("upstream gateway timeout"), {
+			throw new TtsError({
 				code: "EXTERNAL_TTS_UPSTREAM",
+				message: "upstream gateway timeout",
 			});
 		};
 
@@ -79,8 +82,9 @@ describe("POST /api/tts/generate", () => {
 
 	test("returns the original config error message for structured config failures", async () => {
 		synthesizeImpl = async () => {
-			throw Object.assign(new Error("external config missing"), {
+			throw new TtsError({
 				code: "EXTERNAL_TTS_CONFIG",
+				message: "external config missing",
 			});
 		};
 
diff --git a/apps/web/src/lib/tts/provider.test.ts b/apps/web/src/lib/tts/provider.test.ts
index 06dd5a1..8e77f06 100644
--- a/apps/web/src/lib/tts/provider.test.ts
+++ b/apps/web/src/lib/tts/provider.test.ts
@@ -1,4 +1,5 @@
 import { describe, expect, test } from "bun:test";
+import { TtsError } from "./errors";
 import { synthesizeSpeechWithFallback } from "./provider";
 
 describe("synthesizeSpeechWithFallback", () => {
@@ -24,7 +25,7 @@ describe("synthesizeSpeechWithFallback", () => {
 		expect(legacyCalled).toBe(false);
 	});
 
-	test("falls back to the legacy provider when the configured provider is unsupported", async () => {
+	test("falls back to the legacy provider for structured external upstream errors", async () => {
 		let legacyCalled = false;
 
 		const result = await synthesizeSpeechWithFallback({
@@ -36,9 +37,11 @@ describe("synthesizeSpeechWithFallback", () => {
 			text: "hello",
 			voice: "default",
 			openAiSynthesize: async () => {
-				throw new Error(
-					"External TTS request failed: Expected audio response, received text/html; charset=utf-8",
-				);
+				throw new TtsError({
+					code: "EXTERNAL_TTS_UPSTREAM",
+					message:
+						"External TTS request failed: Expected audio response, received text/html; charset=utf-8",
+				});
 			},
 			legacySynthesize: async ({ text }) => {
 				legacyCalled = true;
@@ -51,6 +54,31 @@ describe("synthesizeSpeechWithFallback", () => {
 		expect(legacyCalled).toBe(true);
 	});
 
+	test("rethrows unexpected external provider errors instead of silently falling back", async () => {
+		let legacyCalled = false;
+
+		await expect(
+			synthesizeSpeechWithFallback({
+				env: {
+					API_BASE_URL: "https://example.com/v1",
+					API_MODEL: "tts-1",
+					API_KEY: "secret",
+				},
+				text: "hello",
+				voice: "default",
+				openAiSynthesize: async () => {
+					throw new Error("unexpected provider failure");
+				},
+				legacySynthesize: async () => {
+					legacyCalled = true;
+					return Uint8Array.from([7, 8, 9]).buffer;
+				},
+			}),
+		).rejects.toThrow("unexpected provider failure");
+
+		expect(legacyCalled).toBe(false);
+	});
+
 	test("rethrows missing external config instead of silently falling back", async () => {
 		let openAiCalled = false;
 		let legacyCalled = false;
diff --git a/apps/web/src/lib/tts/provider.ts b/apps/web/src/lib/tts/provider.ts
index 39f0669..7165ce8 100644
--- a/apps/web/src/lib/tts/provider.ts
+++ b/apps/web/src/lib/tts/provider.ts
@@ -32,6 +32,10 @@ export async function synthesizeSpeechWithFallback({
 			throw error;
 		}
 
+		if (!isTtsError(error) || error.code !== "EXTERNAL_TTS_UPSTREAM") {
+			throw error;
+		}
+
 		return legacySynthesize({ text, voice });
 	}
 }
diff --git a/docs/plans/2026-03-17-tts-external-provider-design.md b/docs/plans/2026-03-17-tts-external-provider-design.md
index 9638aec..6468645 100644
--- a/docs/plans/2026-03-17-tts-external-provider-design.md
+++ b/docs/plans/2026-03-17-tts-external-provider-design.md
@@ -4,15 +4,13 @@
 
 `TIA-51` 要求 Cutia 支持调用外部 TTS API，把文本或对话内容生成语音并接入视频编辑流程。
 
-当前仓库已经有一条从文本元素生成语音并插入时间线的链路，但服务端路由 `apps/web/src/app/api/tts/generate/route.ts` 仍然把上游 TTS 服务硬编码为单一匿名接口，`voice` 参数也没有被真正消费。这意味着：
+改造前，仓库已经有一条从文本元素生成语音并插入时间线的链路，但服务端路由 `apps/web/src/app/api/tts/generate/route.ts` 在上游扩展能力和 `voice` 语义上都存在限制。这意味着：
 
 - 外部 TTS 提供方无法通过环境配置切换
-- 语音选项只是前端占位，实际不会影响生成结果
+- `voice` 参数虽然已经沿链路透传，但缺少与外部 provider 对齐的清晰适配语义
 - 错误语义受限于硬编码上游，缺少可维护的适配层
 
-## 无人值守前提
-
-本次执行为无人值守编排，会直接根据工单描述和运行环境中的 `API_BASE_URL`、`API_MODEL`、`API_KEY` 做保守设计，不额外等待人工确认。
+后续设计会在保留既有编辑器接入方式的前提下，把 provider 配置、fallback 与错误适配层补齐为可维护结构。
 
 ## 目标
 

From 60ccc1000a9c44c6ebc235791e53f4446312898d Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Thu, 19 Mar 2026 23:26:29 +0800
Subject: [PATCH 09/22] fix(tts): close rework gaps in provider fallback

Summary:
- harden legacy audio downloads against unsafe redirects and add
  regression tests for redirect validation
- preserve retryability metadata for external upstream failures so
  provider fallback skips non-retryable contract errors
- expand timeout, provider, and route coverage and refresh the design
  note for the current fallback behavior

Rationale:
- CodeRabbit rework flagged redirect bypass and silent fallback cases
  that could hide the real external provider failure
- the live probe showed a 200 text/html response was still falling
  through to legacy, so the external error needed to remain terminal
- the extra tests keep the rework fixes pinned to the actual failure
  modes seen in review and runtime validation

Tests:
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.ts apps/web/src/app/api/tts/generate/route.test.ts apps/web/src/lib/tts/fetch-with-timeout.ts apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/provider.ts apps/web/src/lib/tts/provider.test.ts docs/plans/2026-03-17-tts-external-provider-design.md
- bunx tsc -p apps/web/tsconfig.json --noEmit
- NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:4311 UPSTASH_REDIS_REST_URL=https://example.com UPSTASH_REDIS_REST_TOKEN=dummy bun --eval '...route probe...'

Co-authored-by: Codex <codex@openai.com>
---
 .../src/app/api/tts/generate/route.test.ts    | 13 ++--
 apps/web/src/lib/tts/errors.ts                |  8 ++
 .../src/lib/tts/fetch-with-timeout.test.ts    | 36 +++++++++
 apps/web/src/lib/tts/legacy.test.ts           | 32 ++++++++
 apps/web/src/lib/tts/legacy.ts                | 37 +++++++++
 .../web/src/lib/tts/openai-compatible.test.ts | 36 +++++++++
 apps/web/src/lib/tts/openai-compatible.ts     | 24 ++++++
 apps/web/src/lib/tts/provider.test.ts         | 76 +++++++++++++++++++
 apps/web/src/lib/tts/provider.ts              |  6 +-
 ...2026-03-17-tts-external-provider-design.md |  6 +-
 10 files changed, 265 insertions(+), 9 deletions(-)

diff --git a/apps/web/src/app/api/tts/generate/route.test.ts b/apps/web/src/app/api/tts/generate/route.test.ts
index 27987a8..77485f1 100644
--- a/apps/web/src/app/api/tts/generate/route.test.ts
+++ b/apps/web/src/app/api/tts/generate/route.test.ts
@@ -1,5 +1,6 @@
 import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
 import { TtsError } from "@/lib/tts/errors";
+import { NextRequest } from "next/server";
 
 let synthesizeImpl: typeof import("@/lib/tts/provider").synthesizeSpeechWithFallback;
 const originalConsoleError = console.error;
@@ -19,8 +20,8 @@ mock.module("@/lib/tts/provider", () => ({
 
 const { POST } = await import("./route");
 
-function createRequest(body: unknown): Request {
-	return new Request("http://localhost/api/tts/generate", {
+function createRequest(body: unknown): NextRequest {
+	return new NextRequest("http://localhost/api/tts/generate", {
 		body: JSON.stringify(body),
 		headers: {
 			"content-type": "application/json",
@@ -40,7 +41,7 @@ describe("POST /api/tts/generate", () => {
 	});
 
 	test("returns base64 audio for successful synthesis", async () => {
-		const response = await POST(createRequest({ text: "hello" }) as never);
+		const response = await POST(createRequest({ text: "hello" }));
 
 		expect(response.status).toBe(200);
 		expect(await response.json()).toEqual({
@@ -56,7 +57,7 @@ describe("POST /api/tts/generate", () => {
 			});
 		};
 
-		const response = await POST(createRequest({ text: "hello" }) as never);
+		const response = await POST(createRequest({ text: "hello" }));
 
 		expect(response.status).toBe(502);
 		expect(await response.json()).toEqual({
@@ -72,7 +73,7 @@ describe("POST /api/tts/generate", () => {
 			});
 		};
 
-		const response = await POST(createRequest({ text: "hello" }) as never);
+		const response = await POST(createRequest({ text: "hello" }));
 
 		expect(response.status).toBe(502);
 		expect(await response.json()).toEqual({
@@ -88,7 +89,7 @@ describe("POST /api/tts/generate", () => {
 			});
 		};
 
-		const response = await POST(createRequest({ text: "hello" }) as never);
+		const response = await POST(createRequest({ text: "hello" }));
 
 		expect(response.status).toBe(500);
 		expect(await response.json()).toEqual({
diff --git a/apps/web/src/lib/tts/errors.ts b/apps/web/src/lib/tts/errors.ts
index d3f7bc4..639b9ee 100644
--- a/apps/web/src/lib/tts/errors.ts
+++ b/apps/web/src/lib/tts/errors.ts
@@ -8,17 +8,25 @@ export type TtsErrorCode = (typeof TTS_ERROR_CODES)[number];
 
 export class TtsError extends Error {
 	code: TtsErrorCode;
+	retryable?: boolean;
+	status?: number;
 
 	constructor({
 		code,
 		message,
+		retryable,
+		status,
 	}: {
 		code: TtsErrorCode;
 		message: string;
+		retryable?: boolean;
+		status?: number;
 	}) {
 		super(message);
 		this.name = "TtsError";
 		this.code = code;
+		this.retryable = retryable;
+		this.status = status;
 	}
 }
 
diff --git a/apps/web/src/lib/tts/fetch-with-timeout.test.ts b/apps/web/src/lib/tts/fetch-with-timeout.test.ts
index 880fc93..8652a1f 100644
--- a/apps/web/src/lib/tts/fetch-with-timeout.test.ts
+++ b/apps/web/src/lib/tts/fetch-with-timeout.test.ts
@@ -2,6 +2,24 @@ import { describe, expect, test } from "bun:test";
 import { fetchWithTimeout } from "./fetch-with-timeout";
 
 describe("fetchWithTimeout", () => {
+	test("resolves successfully when fetch completes before the timeout", async () => {
+		let fetchCalled = false;
+
+		const response = await fetchWithTimeout({
+			fetchImpl: async () => {
+				fetchCalled = true;
+				return new Response("ok", { status: 200 });
+			},
+			input: "https://example.com",
+			timeoutMessage: "timed out",
+			timeoutMs: 50,
+		});
+
+		expect(fetchCalled).toBe(true);
+		expect(response.status).toBe(200);
+		expect(await response.text()).toBe("ok");
+	});
+
 	test("rejects immediately when the caller signal is already aborted", async () => {
 		const controller = new AbortController();
 		const callerError = new Error("caller aborted");
@@ -48,4 +66,22 @@ describe("fetchWithTimeout", () => {
 			}),
 		).rejects.toThrow("caller aborted");
 	});
+
+	test("rejects with the timeout message when fetch exceeds timeoutMs", async () => {
+		await expect(
+			fetchWithTimeout({
+				fetchImpl: async (_input, init) =>
+					new Promise((_resolve, reject) => {
+						init?.signal?.addEventListener(
+							"abort",
+							() => reject(new Error("aborted")),
+							{ once: true },
+						);
+					}),
+				input: "https://example.com",
+				timeoutMessage: "timed out",
+				timeoutMs: 10,
+			}),
+		).rejects.toThrow("timed out");
+	});
 });
diff --git a/apps/web/src/lib/tts/legacy.test.ts b/apps/web/src/lib/tts/legacy.test.ts
index 37e4afd..d78d637 100644
--- a/apps/web/src/lib/tts/legacy.test.ts
+++ b/apps/web/src/lib/tts/legacy.test.ts
@@ -83,6 +83,38 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 		expect(Array.from(new Uint8Array(audio))).toEqual([1, 2, 3]);
 	});
 
+	test("rejects redirected audio downloads that leave the allowlist", async () => {
+		let sawManualRedirect = false;
+
+		await expect(
+			synthesizeSpeechWithLegacyProvider({
+				text: "hello",
+				fetchImpl: async (input, init) => {
+					if (String(input).includes("/apis/mbAIsc?")) {
+						return Response.json({
+							code: 200,
+							url: "https://api.milorapart.top/voice/test.mp3",
+						});
+					}
+
+					sawManualRedirect = init?.redirect === "manual";
+
+					return new Response(null, {
+						status: 302,
+						headers: {
+							location: "https://evil.example.com/payload.mp3",
+						},
+					});
+				},
+			}),
+		).rejects.toMatchObject({
+			code: "LEGACY_TTS_UPSTREAM",
+			message: "Legacy TTS audio download redirected to an unexpected host",
+		});
+
+		expect(sawManualRedirect).toBe(true);
+	});
+
 	test("rejects synthesis text that would exceed the legacy GET limit", async () => {
 		let fetchCalled = false;
 
diff --git a/apps/web/src/lib/tts/legacy.ts b/apps/web/src/lib/tts/legacy.ts
index 3f575e9..01d6092 100644
--- a/apps/web/src/lib/tts/legacy.ts
+++ b/apps/web/src/lib/tts/legacy.ts
@@ -12,6 +12,10 @@ const legacyResponseSchema = z.object({
 	url: z.string().url(),
 });
 
+function isRedirectStatus(status: number): boolean {
+	return status >= 300 && status < 400;
+}
+
 function wrapLegacyUpstreamError({ error }: { error: unknown }): TtsError {
 	if (error instanceof TtsError) {
 		return error;
@@ -97,6 +101,7 @@ export async function synthesizeSpeechWithLegacyProvider({
 	try {
 		audioResponse = await fetchWithTimeout({
 			fetchImpl,
+			init: { redirect: "manual" },
 			input: audioUrl,
 			timeoutMessage: "Legacy TTS audio download timed out",
 			timeoutMs,
@@ -105,6 +110,38 @@ export async function synthesizeSpeechWithLegacyProvider({
 		throw wrapLegacyUpstreamError({ error });
 	}
 
+	if (isRedirectStatus(audioResponse.status)) {
+		const location = audioResponse.headers.get("location");
+
+		if (!location) {
+			throw new TtsError({
+				code: "LEGACY_TTS_UPSTREAM",
+				message: `Legacy TTS audio download failed: ${audioResponse.status}`,
+			});
+		}
+
+		let redirectUrl: URL;
+
+		try {
+			redirectUrl = new URL(location, audioUrl);
+		} catch {
+			throw new TtsError({
+				code: "LEGACY_TTS_UPSTREAM",
+				message: "Legacy TTS audio download redirected to an invalid URL",
+			});
+		}
+
+		if (
+			redirectUrl.protocol !== "https:" ||
+			!LEGACY_TTS_ALLOWED_AUDIO_HOSTS.has(redirectUrl.hostname)
+		) {
+			throw new TtsError({
+				code: "LEGACY_TTS_UPSTREAM",
+				message: "Legacy TTS audio download redirected to an unexpected host",
+			});
+		}
+	}
+
 	if (!audioResponse.ok) {
 		throw new TtsError({
 			code: "LEGACY_TTS_UPSTREAM",
diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index 05872f7..fdc90b2 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -44,6 +44,18 @@ describe("getExternalTtsConfig", () => {
 			}),
 		).toThrow("External TTS is not configured");
 	});
+
+	test("rejects malformed API_BASE_URL values", () => {
+		expect(() =>
+			getExternalTtsConfig({
+				env: {
+					API_BASE_URL: "not-a-url",
+					API_MODEL: "tts-1",
+					API_KEY: "secret",
+				},
+			}),
+		).toThrow("External TTS is not configured");
+	});
 });
 
 describe("synthesizeSpeechWithOpenAiCompatible", () => {
@@ -267,6 +279,30 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		).rejects.toThrow("gateway timeout");
 	});
 
+	test("marks auth failures as non-retryable upstream errors", async () => {
+		await expect(
+			synthesizeSpeechWithOpenAiCompatible({
+				config: {
+					apiBaseUrl: "https://example.com/v1",
+					apiKey: "secret",
+					model: "tts-1",
+				},
+				text: "hello",
+				voice: "default",
+				fetchImpl: async () =>
+					Response.json(
+						{ error: { message: "invalid api key" } },
+						{ status: 401, statusText: "Unauthorized" },
+					),
+			}),
+		).rejects.toMatchObject({
+			code: "EXTERNAL_TTS_UPSTREAM",
+			message: "External TTS request failed: invalid api key",
+			retryable: false,
+			status: 401,
+		});
+	});
+
 	test("falls back to the raw upstream body when JSON shape is unrecognized", async () => {
 		await expect(
 			synthesizeSpeechWithOpenAiCompatible({
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index 93ec77d..b6f00cf 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -21,6 +21,14 @@ export interface ExternalTtsConfig {
 	model: string;
 }
 
+function isRetryableStatus(status: number | undefined): boolean {
+	if (status == null) {
+		return true;
+	}
+
+	return status === 408 || status === 429 || status >= 500;
+}
+
 function wrapExternalUpstreamError({ error }: { error: unknown }): TtsError {
 	if (error instanceof TtsError) {
 		return error;
@@ -30,6 +38,7 @@ function wrapExternalUpstreamError({ error }: { error: unknown }): TtsError {
 		code: "EXTERNAL_TTS_UPSTREAM",
 		message:
 			error instanceof Error ? error.message : "External TTS request failed",
+		retryable: true,
 	});
 }
 
@@ -58,6 +67,15 @@ export function getExternalTtsConfig({
 		});
 	}
 
+	try {
+		new URL(apiBaseUrl);
+	} catch {
+		throw new TtsError({
+			code: "EXTERNAL_TTS_CONFIG",
+			message: "External TTS is not configured",
+		});
+	}
+
 	return {
 		apiBaseUrl,
 		apiKey,
@@ -191,6 +209,8 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 				throw new TtsError({
 					code: "EXTERNAL_TTS_UPSTREAM",
 					message: `Expected audio response, received ${contentType || "(no content-type)"}`,
+					retryable: false,
+					status: response.status,
 				});
 			}
 
@@ -200,6 +220,8 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 				throw new TtsError({
 					code: "EXTERNAL_TTS_UPSTREAM",
 					message: "External TTS returned empty audio",
+					retryable: false,
+					status: response.status,
 				});
 			}
 
@@ -218,5 +240,7 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 		message: `External TTS request failed: ${await getUpstreamErrorMessage({
 			response: lastErrorResponse ?? new Response(null, { status: 500 }),
 		})}`,
+		retryable: isRetryableStatus(lastErrorResponse?.status),
+		status: lastErrorResponse?.status,
 	});
 }
diff --git a/apps/web/src/lib/tts/provider.test.ts b/apps/web/src/lib/tts/provider.test.ts
index 8e77f06..db2e0b7 100644
--- a/apps/web/src/lib/tts/provider.test.ts
+++ b/apps/web/src/lib/tts/provider.test.ts
@@ -1,5 +1,6 @@
 import { describe, expect, test } from "bun:test";
 import { TtsError } from "./errors";
+import { synthesizeSpeechWithOpenAiCompatible } from "./openai-compatible";
 import { synthesizeSpeechWithFallback } from "./provider";
 
 describe("synthesizeSpeechWithFallback", () => {
@@ -79,6 +80,81 @@ describe("synthesizeSpeechWithFallback", () => {
 		expect(legacyCalled).toBe(false);
 	});
 
+	test("rethrows non-retryable external upstream errors instead of falling back", async () => {
+		let legacyCalled = false;
+
+		await expect(
+			synthesizeSpeechWithFallback({
+				env: {
+					API_BASE_URL: "https://example.com/v1",
+					API_MODEL: "tts-1",
+					API_KEY: "secret",
+				},
+				text: "hello",
+				voice: "default",
+				openAiSynthesize: async () => {
+					throw Object.assign(
+						new TtsError({
+							code: "EXTERNAL_TTS_UPSTREAM",
+							message: "External TTS request failed: invalid api key",
+						}),
+						{
+							retryable: false,
+							status: 401,
+						},
+					);
+				},
+				legacySynthesize: async () => {
+					legacyCalled = true;
+					return Uint8Array.from([7, 8, 9]).buffer;
+				},
+			}),
+		).rejects.toMatchObject({
+			code: "EXTERNAL_TTS_UPSTREAM",
+			retryable: false,
+			status: 401,
+		});
+
+		expect(legacyCalled).toBe(false);
+	});
+
+	test("does not fall back when the external provider returns a non-audio success response", async () => {
+		let legacyCalled = false;
+
+		await expect(
+			synthesizeSpeechWithFallback({
+				env: {
+					API_BASE_URL: "https://example.com/v1",
+					API_MODEL: "tts-1",
+					API_KEY: "secret",
+				},
+				text: "hello",
+				voice: "default",
+				openAiSynthesize: ({ config, text, voice }) =>
+					synthesizeSpeechWithOpenAiCompatible({
+						config,
+						text,
+						voice,
+						fetchImpl: async () =>
+							new Response("<!doctype html>", {
+								status: 200,
+								headers: { "Content-Type": "text/html; charset=utf-8" },
+							}),
+					}),
+				legacySynthesize: async () => {
+					legacyCalled = true;
+					return Uint8Array.from([7, 8, 9]).buffer;
+				},
+			}),
+		).rejects.toMatchObject({
+			code: "EXTERNAL_TTS_UPSTREAM",
+			message: "Expected audio response, received text/html; charset=utf-8",
+			retryable: false,
+		});
+
+		expect(legacyCalled).toBe(false);
+	});
+
 	test("rethrows missing external config instead of silently falling back", async () => {
 		let openAiCalled = false;
 		let legacyCalled = false;
diff --git a/apps/web/src/lib/tts/provider.ts b/apps/web/src/lib/tts/provider.ts
index 7165ce8..fd45996 100644
--- a/apps/web/src/lib/tts/provider.ts
+++ b/apps/web/src/lib/tts/provider.ts
@@ -32,7 +32,11 @@ export async function synthesizeSpeechWithFallback({
 			throw error;
 		}
 
-		if (!isTtsError(error) || error.code !== "EXTERNAL_TTS_UPSTREAM") {
+		if (
+			!isTtsError(error) ||
+			error.code !== "EXTERNAL_TTS_UPSTREAM" ||
+			error.retryable === false
+		) {
 			throw error;
 		}
 
diff --git a/docs/plans/2026-03-17-tts-external-provider-design.md b/docs/plans/2026-03-17-tts-external-provider-design.md
index 6468645..3978d71 100644
--- a/docs/plans/2026-03-17-tts-external-provider-design.md
+++ b/docs/plans/2026-03-17-tts-external-provider-design.md
@@ -103,8 +103,10 @@
 
 - 请求参数非法：返回 `400`
 - TTS 环境变量缺失：返回 `500`，信息明确为未配置
-- 外部 TTS 返回非 2xx：返回 `502`，透出可读错误
-- 外部 TTS 返回空音频或异常格式：返回 `502`
+- 外部 provider 返回可重试的 `EXTERNAL_TTS_UPSTREAM`（例如超时、`429`、`5xx`）时，会先回退到 legacy provider
+  - legacy 回退成功：最终仍可能返回 `200`
+  - legacy 回退失败：最终返回 `502`
+- 外部 provider 返回不可重试的 `EXTERNAL_TTS_UPSTREAM`（例如 `401`/`403`/`404`、空音频、非音频响应）时：直接返回 `502`
 - 未知异常：返回 `500`
 
 ## 测试策略

From b5dc0302132c52dc01722b65443b35b6a3655c0d Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Thu, 19 Mar 2026 23:45:57 +0800
Subject: [PATCH 10/22] fix(tts): handle legacy redirects and namespaced config

Summary:
- follow allowlisted legacy audio redirects instead of failing after
  validating the target URL
- prefer EXTERNAL_TTS_API_* config with API_* aliases kept for
  backward-compatible migration
- update provider integration tests and TTS docs to reflect the new
  redirect and config behavior

Rationale:
- the latest PR review caught a real regression where safe legacy
  redirects could never succeed under redirect: manual
- namespaced TTS env keys reduce collisions with unrelated API config
  while preserving the existing unattended environment
- keeping docs and tests aligned prevents this review feedback from
  recurring in later rework attempts

Tests:
- bun test apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.test.ts
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.ts apps/web/src/app/api/tts/generate/route.test.ts apps/web/src/lib/tts/fetch-with-timeout.ts apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/provider.ts apps/web/src/lib/tts/provider.test.ts packages/env/src/web.ts docs/plans/2026-03-17-tts-external-provider.md docs/plans/2026-03-17-tts-external-provider-design.md
- bunx tsc -p apps/web/tsconfig.json --noEmit

Co-authored-by: Codex <codex@openai.com>
---
 apps/web/src/lib/tts/legacy.test.ts           | 40 +++++++++++++++++++
 apps/web/src/lib/tts/legacy.ts                | 12 ++++++
 .../web/src/lib/tts/openai-compatible.test.ts | 37 ++++++++++++++++-
 apps/web/src/lib/tts/openai-compatible.ts     | 16 +++++++-
 apps/web/src/lib/tts/provider.test.ts         |  6 +--
 apps/web/src/lib/tts/provider.ts              |  3 ++
 ...2026-03-17-tts-external-provider-design.md |  5 ++-
 .../plans/2026-03-17-tts-external-provider.md |  3 +-
 packages/env/src/web.ts                       |  3 ++
 9 files changed, 117 insertions(+), 8 deletions(-)

diff --git a/apps/web/src/lib/tts/legacy.test.ts b/apps/web/src/lib/tts/legacy.test.ts
index d78d637..bbe9896 100644
--- a/apps/web/src/lib/tts/legacy.test.ts
+++ b/apps/web/src/lib/tts/legacy.test.ts
@@ -115,6 +115,46 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 		expect(sawManualRedirect).toBe(true);
 	});
 
+	test("follows allowlisted redirects for legacy audio downloads", async () => {
+		let downloadCallCount = 0;
+
+		const audio = await synthesizeSpeechWithLegacyProvider({
+			text: "hello",
+			fetchImpl: async (input, init) => {
+				if (String(input).includes("/apis/mbAIsc?")) {
+					return Response.json({
+						code: 200,
+						url: "https://api.milorapart.top/voice/test.mp3",
+					});
+				}
+
+				downloadCallCount++;
+
+				if (downloadCallCount === 1) {
+					expect(init?.redirect).toBe("manual");
+
+					return new Response(null, {
+						status: 302,
+						headers: {
+							location: "https://api.milorapart.top/voice/test-redirected.mp3",
+						},
+					});
+				}
+
+				expect(String(input)).toBe(
+					"https://api.milorapart.top/voice/test-redirected.mp3",
+				);
+				return new Response(Uint8Array.from([4, 5, 6]), {
+					status: 200,
+					headers: { "Content-Type": "audio/mpeg" },
+				});
+			},
+		});
+
+		expect(downloadCallCount).toBe(2);
+		expect(Array.from(new Uint8Array(audio))).toEqual([4, 5, 6]);
+	});
+
 	test("rejects synthesis text that would exceed the legacy GET limit", async () => {
 		let fetchCalled = false;
 
diff --git a/apps/web/src/lib/tts/legacy.ts b/apps/web/src/lib/tts/legacy.ts
index 01d6092..f0e8212 100644
--- a/apps/web/src/lib/tts/legacy.ts
+++ b/apps/web/src/lib/tts/legacy.ts
@@ -140,6 +140,18 @@ export async function synthesizeSpeechWithLegacyProvider({
 				message: "Legacy TTS audio download redirected to an unexpected host",
 			});
 		}
+
+		try {
+			audioResponse = await fetchWithTimeout({
+				fetchImpl,
+				init: { redirect: "error" },
+				input: redirectUrl,
+				timeoutMessage: "Legacy TTS audio download timed out",
+				timeoutMs,
+			});
+		} catch (error) {
+			throw wrapLegacyUpstreamError({ error });
+		}
 	}
 
 	if (!audioResponse.ok) {
diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index fdc90b2..366065f 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -6,7 +6,23 @@ import {
 } from "./openai-compatible";
 
 describe("getExternalTtsConfig", () => {
-	test("reads required config from environment", () => {
+	test("reads namespaced TTS config from environment", () => {
+		const config = getExternalTtsConfig({
+			env: {
+				EXTERNAL_TTS_API_BASE_URL: "https://example.com/v1/",
+				EXTERNAL_TTS_API_MODEL: "tts-1",
+				EXTERNAL_TTS_API_KEY: "secret",
+			},
+		});
+
+		expect(config).toEqual({
+			apiBaseUrl: "https://example.com/v1",
+			apiKey: "secret",
+			model: "tts-1",
+		});
+	});
+
+	test("falls back to legacy API_* aliases when namespaced TTS config is absent", () => {
 		const config = getExternalTtsConfig({
 			env: {
 				API_BASE_URL: "https://example.com/v1/",
@@ -22,6 +38,25 @@ describe("getExternalTtsConfig", () => {
 		});
 	});
 
+	test("prefers namespaced TTS config over legacy aliases", () => {
+		const config = getExternalTtsConfig({
+			env: {
+				API_BASE_URL: "https://legacy.example.com/v1/",
+				API_MODEL: "legacy-tts",
+				API_KEY: "legacy-secret",
+				EXTERNAL_TTS_API_BASE_URL: "https://example.com/v1/",
+				EXTERNAL_TTS_API_MODEL: "tts-1",
+				EXTERNAL_TTS_API_KEY: "secret",
+			},
+		});
+
+		expect(config).toEqual({
+			apiBaseUrl: "https://example.com/v1",
+			apiKey: "secret",
+			model: "tts-1",
+		});
+	});
+
 	test("throws a clear error when config is incomplete", () => {
 		expect(() =>
 			getExternalTtsConfig({
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index b6f00cf..8fb42d0 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -21,6 +21,18 @@ export interface ExternalTtsConfig {
 	model: string;
 }
 
+function resolveExternalTtsEnv({
+	env,
+}: {
+	env: Record<string, string | undefined>;
+}): Record<"API_BASE_URL" | "API_MODEL" | "API_KEY", string | undefined> {
+	return {
+		API_BASE_URL: env.EXTERNAL_TTS_API_BASE_URL ?? env.API_BASE_URL,
+		API_MODEL: env.EXTERNAL_TTS_API_MODEL ?? env.API_MODEL,
+		API_KEY: env.EXTERNAL_TTS_API_KEY ?? env.API_KEY,
+	};
+}
+
 function isRetryableStatus(status: number | undefined): boolean {
 	if (status == null) {
 		return true;
@@ -47,7 +59,9 @@ export function getExternalTtsConfig({
 }: {
 	env: Record<string, string | undefined>;
 }): ExternalTtsConfig {
-	const parsed = externalTtsConfigSchema.safeParse(env);
+	const parsed = externalTtsConfigSchema.safeParse(
+		resolveExternalTtsEnv({ env }),
+	);
 
 	if (!parsed.success) {
 		throw new TtsError({
diff --git a/apps/web/src/lib/tts/provider.test.ts b/apps/web/src/lib/tts/provider.test.ts
index db2e0b7..627f9d1 100644
--- a/apps/web/src/lib/tts/provider.test.ts
+++ b/apps/web/src/lib/tts/provider.test.ts
@@ -9,9 +9,9 @@ describe("synthesizeSpeechWithFallback", () => {
 
 		const result = await synthesizeSpeechWithFallback({
 			env: {
-				API_BASE_URL: "https://example.com/v1",
-				API_MODEL: "tts-1",
-				API_KEY: "secret",
+				EXTERNAL_TTS_API_BASE_URL: "https://example.com/v1",
+				EXTERNAL_TTS_API_MODEL: "tts-1",
+				EXTERNAL_TTS_API_KEY: "secret",
 			},
 			text: "hello",
 			voice: "default",
diff --git a/apps/web/src/lib/tts/provider.ts b/apps/web/src/lib/tts/provider.ts
index fd45996..39d1c37 100644
--- a/apps/web/src/lib/tts/provider.ts
+++ b/apps/web/src/lib/tts/provider.ts
@@ -9,6 +9,9 @@ type TtsEnv = {
 	API_BASE_URL?: string;
 	API_MODEL?: string;
 	API_KEY?: string;
+	EXTERNAL_TTS_API_BASE_URL?: string;
+	EXTERNAL_TTS_API_MODEL?: string;
+	EXTERNAL_TTS_API_KEY?: string;
 };
 
 export async function synthesizeSpeechWithFallback({
diff --git a/docs/plans/2026-03-17-tts-external-provider-design.md b/docs/plans/2026-03-17-tts-external-provider-design.md
index 3978d71..cadce47 100644
--- a/docs/plans/2026-03-17-tts-external-provider-design.md
+++ b/docs/plans/2026-03-17-tts-external-provider-design.md
@@ -43,7 +43,8 @@
 
 优点：
 - 只需要一层薄适配，即可支持大量 OpenAI 兼容的 TTS 服务
-- 和当前运行环境提供的 `API_BASE_URL`、`API_MODEL`、`API_KEY` 直接对齐
+- 优先使用 `EXTERNAL_TTS_API_BASE_URL`、`EXTERNAL_TTS_API_MODEL`、`EXTERNAL_TTS_API_KEY`
+- 兼容读取当前运行环境里的 `API_BASE_URL`、`API_MODEL`、`API_KEY` 作为迁移别名
 - 前端接口保持不变，编辑器链路改动最小
 
 缺点：
@@ -121,7 +122,7 @@
 
 ### 真实验证
 
-- 使用环境中的真实 `API_BASE_URL`、`API_MODEL`、`API_KEY`
+- 使用环境中的真实 TTS 配置验证，优先为 `EXTERNAL_TTS_API_*`，没有时回退到 `API_*`
 - 直接运行一次服务端适配逻辑，验证能拿到非空 MP3 数据
 
 ## 风险与缓解
diff --git a/docs/plans/2026-03-17-tts-external-provider.md b/docs/plans/2026-03-17-tts-external-provider.md
index 34a07e7..c05f40d 100644
--- a/docs/plans/2026-03-17-tts-external-provider.md
+++ b/docs/plans/2026-03-17-tts-external-provider.md
@@ -27,7 +27,8 @@ Expected: FAIL，原因是测试文件或实现不存在。
 
 **Step 3: 为后续实现准备最小配置面**
 
-- 在环境 schema 中加入 `API_BASE_URL`、`API_MODEL`、`API_KEY`
+- 在环境 schema 中加入 `EXTERNAL_TTS_API_BASE_URL`、`EXTERNAL_TTS_API_MODEL`、`EXTERNAL_TTS_API_KEY`
+- 兼容读取旧的 `API_BASE_URL`、`API_MODEL`、`API_KEY`，用于迁移与当前共享环境
 - 在 TTS 常量中定义默认 voice 与可选 voice 列表
 
 **Step 4: 运行定向测试**
diff --git a/packages/env/src/web.ts b/packages/env/src/web.ts
index d0ffdbc..c40efc5 100644
--- a/packages/env/src/web.ts
+++ b/packages/env/src/web.ts
@@ -21,6 +21,9 @@ const webEnvSchema = z.object({
 	UPSTASH_REDIS_REST_TOKEN: z.string(),
 	FREESOUND_CLIENT_ID: z.string().optional(),
 	FREESOUND_API_KEY: z.string().optional(),
+	EXTERNAL_TTS_API_BASE_URL: z.string().optional(),
+	EXTERNAL_TTS_API_MODEL: z.string().optional(),
+	EXTERNAL_TTS_API_KEY: z.string().optional(),
 	API_BASE_URL: z.string().optional(),
 	API_MODEL: z.string().optional(),
 	API_KEY: z.string().optional(),

From 5e233ff1f8bfac5472371fea26b76fccb4a42830 Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Thu, 19 Mar 2026 23:50:28 +0800
Subject: [PATCH 11/22] test(tts): cover fallback 500 path and clean fixtures

Summary:
- add a route test for unexpected non-TtsError failures so the generic
  500 fallback path stays covered
- simplify provider test fixtures by constructing TtsError directly
  instead of mutating the instance after creation

Rationale:
- the latest review only left test-layer gaps, but they still matter for
  future refactors around error handling
- using the constructor directly keeps the fixture idiomatic and matches
  the runtime shape more clearly

Tests:
- bun test apps/web/src/lib/tts/provider.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx @biomejs/biome check apps/web/src/lib/tts/provider.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx tsc -p apps/web/tsconfig.json --noEmit

Co-authored-by: Codex <codex@openai.com>
---
 apps/web/src/app/api/tts/generate/route.test.ts | 14 ++++++++++++++
 apps/web/src/lib/tts/provider.test.ts           | 16 ++++++----------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/apps/web/src/app/api/tts/generate/route.test.ts b/apps/web/src/app/api/tts/generate/route.test.ts
index 77485f1..6250a9f 100644
--- a/apps/web/src/app/api/tts/generate/route.test.ts
+++ b/apps/web/src/app/api/tts/generate/route.test.ts
@@ -96,4 +96,18 @@ describe("POST /api/tts/generate", () => {
 			error: "external config missing",
 		});
 	});
+
+	test("returns 500 for unexpected non-TtsError exceptions", async () => {
+		synthesizeImpl = async () => {
+			throw new Error("unexpected failure");
+		};
+
+		const response = await POST(createRequest({ text: "hello" }));
+
+		expect(response.status).toBe(500);
+		expect(await response.json()).toEqual({
+			error: "Internal server error",
+			detail: "unexpected failure",
+		});
+	});
 });
diff --git a/apps/web/src/lib/tts/provider.test.ts b/apps/web/src/lib/tts/provider.test.ts
index 627f9d1..abef510 100644
--- a/apps/web/src/lib/tts/provider.test.ts
+++ b/apps/web/src/lib/tts/provider.test.ts
@@ -93,16 +93,12 @@ describe("synthesizeSpeechWithFallback", () => {
 				text: "hello",
 				voice: "default",
 				openAiSynthesize: async () => {
-					throw Object.assign(
-						new TtsError({
-							code: "EXTERNAL_TTS_UPSTREAM",
-							message: "External TTS request failed: invalid api key",
-						}),
-						{
-							retryable: false,
-							status: 401,
-						},
-					);
+					throw new TtsError({
+						code: "EXTERNAL_TTS_UPSTREAM",
+						message: "External TTS request failed: invalid api key",
+						retryable: false,
+						status: 401,
+					});
 				},
 				legacySynthesize: async () => {
 					legacyCalled = true;

From 97faf992ae09d3fe5a61e71d448590916836d1e5 Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Thu, 19 Mar 2026 23:59:08 +0800
Subject: [PATCH 12/22] fix(tts): harden upstream audio reads and probes

Summary:
- reject non-http external base URLs and wrap audio body read failures
  as structured non-retryable TtsError values
- tighten the legacy redirect success test to assert the follow-up
  fetch uses redirect: error
- replace the implementation-plan placeholder with a concrete bun
  probe for the external adapter entrypoint

Rationale:
- the latest review found two real gaps where malformed config or body
  stream failures could escape the expected TTS error semantics
- the stricter redirect assertion pins the security-sensitive fetch mode
  after the validated redirect handoff
- the doc probe now matches the shipped adapter and is reproducible

Tests:
- bun test apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.test.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts docs/plans/2026-03-17-tts-external-provider.md
- bunx tsc -p apps/web/tsconfig.json --noEmit

Co-authored-by: Codex <codex@openai.com>
---
 apps/web/src/lib/tts/legacy.test.ts           |  1 +
 .../web/src/lib/tts/openai-compatible.test.ts | 42 +++++++++++++++++++
 apps/web/src/lib/tts/openai-compatible.ts     | 19 ++++++++-
 .../plans/2026-03-17-tts-external-provider.md |  2 +-
 4 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/apps/web/src/lib/tts/legacy.test.ts b/apps/web/src/lib/tts/legacy.test.ts
index bbe9896..67620db 100644
--- a/apps/web/src/lib/tts/legacy.test.ts
+++ b/apps/web/src/lib/tts/legacy.test.ts
@@ -141,6 +141,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 					});
 				}
 
+				expect(init?.redirect).toBe("error");
 				expect(String(input)).toBe(
 					"https://api.milorapart.top/voice/test-redirected.mp3",
 				);
diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index 366065f..eaad7e8 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -91,6 +91,18 @@ describe("getExternalTtsConfig", () => {
 			}),
 		).toThrow("External TTS is not configured");
 	});
+
+	test("rejects non-http API_BASE_URL schemes", () => {
+		expect(() =>
+			getExternalTtsConfig({
+				env: {
+					EXTERNAL_TTS_API_BASE_URL: "mailto:tts@example.com",
+					EXTERNAL_TTS_API_MODEL: "tts-1",
+					EXTERNAL_TTS_API_KEY: "secret",
+				},
+			}),
+		).toThrow("External TTS is not configured");
+	});
 });
 
 describe("synthesizeSpeechWithOpenAiCompatible", () => {
@@ -269,6 +281,36 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		expect(Array.from(new Uint8Array(audio))).toEqual([1, 2, 3]);
 	});
 
+	test("wraps arrayBuffer read failures as non-retryable upstream errors", async () => {
+		const response = new Response(Uint8Array.from([1, 2, 3]), {
+			status: 200,
+			headers: { "Content-Type": "audio/mpeg" },
+		});
+		Object.defineProperty(response, "arrayBuffer", {
+			value: async () => {
+				throw new Error("stream failed");
+			},
+		});
+
+		await expect(
+			synthesizeSpeechWithOpenAiCompatible({
+				config: {
+					apiBaseUrl: "https://example.com/v1",
+					apiKey: "secret",
+					model: "tts-1",
+				},
+				text: "hello",
+				voice: "default",
+				fetchImpl: async () => response,
+			}),
+		).rejects.toMatchObject({
+			code: "EXTERNAL_TTS_UPSTREAM",
+			message: "External TTS audio read failed: stream failed",
+			retryable: false,
+			status: 200,
+		});
+	});
+
 	test("aborts upstream requests that exceed the timeout", async () => {
 		await expect(
 			synthesizeSpeechWithOpenAiCompatible({
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index 8fb42d0..6f55c1d 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -82,7 +82,11 @@ export function getExternalTtsConfig({
 	}
 
 	try {
-		new URL(apiBaseUrl);
+		const url = new URL(apiBaseUrl);
+
+		if (url.protocol !== "http:" && url.protocol !== "https:") {
+			throw new Error("Unsupported protocol");
+		}
 	} catch {
 		throw new TtsError({
 			code: "EXTERNAL_TTS_CONFIG",
@@ -228,7 +232,18 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 				});
 			}
 
-			const audio = await response.arrayBuffer();
+			let audio: ArrayBuffer;
+
+			try {
+				audio = await response.arrayBuffer();
+			} catch (error) {
+				throw new TtsError({
+					code: "EXTERNAL_TTS_UPSTREAM",
+					message: `External TTS audio read failed: ${error instanceof Error ? error.message : "Unknown error"}`,
+					retryable: false,
+					status: response.status,
+				});
+			}
 
 			if (audio.byteLength === 0) {
 				throw new TtsError({
diff --git a/docs/plans/2026-03-17-tts-external-provider.md b/docs/plans/2026-03-17-tts-external-provider.md
index c05f40d..8b32e0f 100644
--- a/docs/plans/2026-03-17-tts-external-provider.md
+++ b/docs/plans/2026-03-17-tts-external-provider.md
@@ -125,7 +125,7 @@ Expected: PASS
 
 **Step 2: 运行真实外部 TTS 验证**
 
-Run: `bun --eval '<补一段调用适配层的脚本>'`
+Run: `bun --eval 'import { getExternalTtsConfig, synthesizeSpeechWithOpenAiCompatible } from "./apps/web/src/lib/tts/openai-compatible.ts"; const config = getExternalTtsConfig({ env: process.env }); const audio = await synthesizeSpeechWithOpenAiCompatible({ config, text: "Cutia TTS probe", voice: "default" }); console.log(audio.byteLength);'`
 Expected: 输出非空音频字节长度，不打印密钥。
 
 **Step 3: 检查格式与类型**

From 7279ef157b7c20118ee8ac54f31ef810c189727d Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Fri, 20 Mar 2026 00:07:30 +0800
Subject: [PATCH 13/22] test(tts): cover media and timeline insertion flow

Summary:
- add service-level tests for base64 decoding from `/api/tts/generate`
- verify generated speech is uploaded into the media library and
  inserted onto an available audio track
- verify overlapping inserts allocate a new audio track when needed

Rationale:
- the remaining acceptance surface was the editor-side integration from
  generated audio into Cutia's media and timeline workflow
- these tests give deterministic evidence for the app integration even
  while the shared external provider still lacks real audio output

Tests:
- bun test apps/web/src/lib/tts/service.test.ts
- bunx @biomejs/biome check apps/web/src/lib/tts/service.test.ts apps/web/src/lib/tts/service.ts
- bunx tsc -p apps/web/tsconfig.json --noEmit

Co-authored-by: Codex <codex@openai.com>
---
 apps/web/src/lib/tts/service.test.ts | 220 +++++++++++++++++++++++++++
 1 file changed, 220 insertions(+)
 create mode 100644 apps/web/src/lib/tts/service.test.ts

diff --git a/apps/web/src/lib/tts/service.test.ts b/apps/web/src/lib/tts/service.test.ts
new file mode 100644
index 0000000..86624c2
--- /dev/null
+++ b/apps/web/src/lib/tts/service.test.ts
@@ -0,0 +1,220 @@
+import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
+import type { EditorCore } from "@/core";
+import type { AudioTrack } from "@/types/timeline";
+import { generateAndInsertSpeech, generateSpeechFromText } from "./service";
+
+const originalFetch = globalThis.fetch;
+const originalAudioContext = globalThis.AudioContext;
+const originalCreateObjectURL = URL.createObjectURL;
+
+describe("tts service", () => {
+	let decodedBytes: number[] | null;
+	let fakeBuffer: AudioBuffer;
+
+	beforeEach(() => {
+		decodedBytes = null;
+		fakeBuffer = { duration: 2.5 } as AudioBuffer;
+
+		Object.defineProperty(globalThis, "AudioContext", {
+			configurable: true,
+			value: class FakeAudioContext {
+				async decodeAudioData(arrayBuffer: ArrayBuffer) {
+					decodedBytes = Array.from(new Uint8Array(arrayBuffer));
+					return fakeBuffer;
+				}
+			},
+		});
+		URL.createObjectURL = mock(() => "blob:tts-preview");
+	});
+
+	afterEach(() => {
+		globalThis.fetch = originalFetch;
+		Object.defineProperty(globalThis, "AudioContext", {
+			configurable: true,
+			value: originalAudioContext,
+		});
+		URL.createObjectURL = originalCreateObjectURL;
+	});
+
+	test("generateSpeechFromText decodes base64 audio returned by the route", async () => {
+		const fetchCalls: Array<[RequestInfo | URL, RequestInit | undefined]> = [];
+		globalThis.fetch = (async (input, init) => {
+			fetchCalls.push([input, init]);
+			return Response.json({ audio: "AQID" });
+		}) as typeof fetch;
+
+		const result = await generateSpeechFromText({
+			text: "hello",
+			voice: "nova",
+		});
+
+		expect(fetchCalls).toHaveLength(1);
+		expect(fetchCalls[0]?.[0]).toBe("/api/tts/generate");
+		expect(fetchCalls[0]?.[1]).toMatchObject({
+			method: "POST",
+			headers: { "Content-Type": "application/json" },
+		});
+		expect(JSON.parse(String(fetchCalls[0]?.[1]?.body))).toEqual({
+			text: "hello",
+			voice: "nova",
+		});
+		expect(decodedBytes).toEqual([1, 2, 3]);
+		expect(result.duration).toBe(2.5);
+		expect(result.buffer).toBe(fakeBuffer);
+		expect(result.blob.type).toBe("audio/mpeg");
+		expect(Array.from(new Uint8Array(await result.blob.arrayBuffer()))).toEqual(
+			[1, 2, 3],
+		);
+	});
+
+	test("generateAndInsertSpeech uploads generated audio and inserts it into an existing audio track", async () => {
+		globalThis.fetch = (async () =>
+			Response.json({ audio: "AQID" })) as unknown as typeof fetch;
+
+		const tracks: AudioTrack[] = [
+			{
+				id: "audio-track-1",
+				name: "Audio 1",
+				type: "audio",
+				muted: false,
+				elements: [],
+			},
+		];
+		const addMediaAssetCalls: unknown[] = [];
+		const addMediaAssetMock = async (args: unknown) => {
+			addMediaAssetCalls.push(args);
+			return "media-1";
+		};
+		let addTrackCallCount = 0;
+		const addTrackMock = () => {
+			addTrackCallCount++;
+			throw new Error("addTrack should not be called");
+		};
+		const insertElementCalls: unknown[] = [];
+		const insertElementMock = (args: unknown) => {
+			insertElementCalls.push(args);
+		};
+
+		const editor = {
+			media: {
+				addMediaAsset: addMediaAssetMock,
+			},
+			project: {
+				getActive: () => ({
+					metadata: { id: "project-1" },
+				}),
+			},
+			timeline: {
+				getTracks: () => tracks,
+				addTrack: addTrackMock,
+				insertElement: insertElementMock,
+			},
+		} as unknown as EditorCore;
+
+		const result = await generateAndInsertSpeech({
+			editor,
+			text: "hello world",
+			startTime: 3,
+			voice: "default",
+		});
+
+		expect(result).toEqual({ duration: 2.5 });
+		expect(addMediaAssetCalls).toHaveLength(1);
+		expect(addMediaAssetCalls[0]).toMatchObject({
+			projectId: "project-1",
+			asset: {
+				name: "TTS: hello world",
+				type: "audio",
+				url: "blob:tts-preview",
+				duration: 2.5,
+				ephemeral: true,
+			},
+		});
+		expect(insertElementCalls).toHaveLength(1);
+		expect(insertElementCalls[0]).toMatchObject({
+			placement: {
+				mode: "explicit",
+				trackId: "audio-track-1",
+			},
+			element: {
+				type: "audio",
+				sourceType: "upload",
+				mediaId: "media-1",
+				name: "TTS: hello world",
+				duration: 2.5,
+				startTime: 3,
+				buffer: fakeBuffer,
+			},
+		});
+		expect(addTrackCallCount).toBe(0);
+	});
+
+	test("generateAndInsertSpeech creates a new audio track when existing ones overlap", async () => {
+		globalThis.fetch = (async () =>
+			Response.json({ audio: "AQID" })) as unknown as typeof fetch;
+
+		const tracks: AudioTrack[] = [
+			{
+				id: "audio-track-1",
+				name: "Audio 1",
+				type: "audio",
+				muted: false,
+				elements: [
+					{
+						id: "audio-el-1",
+						type: "audio",
+						sourceType: "upload",
+						mediaId: "existing-media",
+						name: "Existing audio",
+						duration: 10,
+						startTime: 0,
+						trimStart: 0,
+						trimEnd: 0,
+						volume: 1,
+						muted: false,
+					},
+				],
+			},
+		];
+		const addMediaAssetMock = async () => "media-2";
+		const addTrackCalls: unknown[] = [];
+		const addTrackMock = (args: unknown) => {
+			addTrackCalls.push(args);
+			return "audio-track-2";
+		};
+		const insertElementCalls: unknown[] = [];
+		const insertElementMock = (args: unknown) => {
+			insertElementCalls.push(args);
+		};
+
+		const editor = {
+			media: {
+				addMediaAsset: addMediaAssetMock,
+			},
+			project: {
+				getActive: () => ({
+					metadata: { id: "project-1" },
+				}),
+			},
+			timeline: {
+				getTracks: () => tracks,
+				addTrack: addTrackMock,
+				insertElement: insertElementMock,
+			},
+		} as unknown as EditorCore;
+
+		await generateAndInsertSpeech({
+			editor,
+			text: "overlap check",
+			startTime: 2,
+		});
+
+		expect(addTrackCalls).toEqual([{ type: "audio" }]);
+		expect(insertElementCalls[0]).toMatchObject({
+			placement: {
+				mode: "explicit",
+				trackId: "audio-track-2",
+			},
+		});
+	});
+});

From 13588747e4100cc625966c0e15fa2d803696721e Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Fri, 20 Mar 2026 00:21:05 +0800
Subject: [PATCH 14/22] test(tts): reduce flake and tighten final invariants

Summary:
- extract shared legacy metadata fixtures and raise timeout test values
  to reduce scheduler-sensitive flakes
- replace the impossible synthetic external fallback response with an
  explicit invariant error before final upstream error mapping

Rationale:
- the latest review only pointed out maintainability and test stability
  issues, but these are cheap to fix and remove avoidable noise
- failing loudly on an impossible null upstream response makes future
  adapter regressions easier to diagnose than masking them as 500s

Tests:
- bun test apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/openai-compatible.test.ts
- bunx @biomejs/biome check apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/openai-compatible.ts
- bunx tsc -p apps/web/tsconfig.json --noEmit

Co-authored-by: Codex <codex@openai.com>
---
 apps/web/src/lib/tts/legacy.test.ts       | 54 +++++++++--------------
 apps/web/src/lib/tts/openai-compatible.ts | 12 +++--
 2 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/apps/web/src/lib/tts/legacy.test.ts b/apps/web/src/lib/tts/legacy.test.ts
index 67620db..950a746 100644
--- a/apps/web/src/lib/tts/legacy.test.ts
+++ b/apps/web/src/lib/tts/legacy.test.ts
@@ -2,6 +2,16 @@ import { describe, expect, test } from "bun:test";
 import { synthesizeSpeechWithLegacyProvider } from "./legacy";
 
 describe("synthesizeSpeechWithLegacyProvider", () => {
+	const TEST_TIMEOUT_MS = 50;
+	const LEGACY_AUDIO_URL = "https://api.milorapart.top/voice/test.mp3";
+
+	function legacyMetadataOk(url = LEGACY_AUDIO_URL): Response {
+		return Response.json({
+			code: 200,
+			url,
+		});
+	}
+
 	test("rejects audio urls outside the expected https host allowlist", async () => {
 		const calls: string[] = [];
 
@@ -10,10 +20,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 				text: "hello",
 				fetchImpl: async (input) => {
 					calls.push(String(input));
-					return Response.json({
-						code: 200,
-						url: "http://127.0.0.1/internal.mp3",
-					});
+					return legacyMetadataOk("http://127.0.0.1/internal.mp3");
 				},
 			}),
 		).rejects.toThrow("Legacy TTS returned an unexpected audio URL");
@@ -27,10 +34,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 				text: "hello",
 				fetchImpl: async (input) => {
 					if (String(input).includes("/apis/mbAIsc?")) {
-						return Response.json({
-							code: 200,
-							url: "https://api.milorapart.top/voice/test.mp3",
-						});
+						return legacyMetadataOk();
 					}
 
 					return new Response("<html></html>", {
@@ -48,10 +52,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 				text: "hello",
 				fetchImpl: async (input) => {
 					if (String(input).includes("/apis/mbAIsc?")) {
-						return Response.json({
-							code: 200,
-							url: "https://api.milorapart.top/voice/test.mp3",
-						});
+						return legacyMetadataOk();
 					}
 
 					return new Response(Uint8Array.from([1, 2, 3]), {
@@ -67,10 +68,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 			text: "hello",
 			fetchImpl: async (input) => {
 				if (String(input).includes("/apis/mbAIsc?")) {
-					return Response.json({
-						code: 200,
-						url: "https://api.milorapart.top/voice/test.mp3",
-					});
+					return legacyMetadataOk();
 				}
 
 				return new Response(Uint8Array.from([1, 2, 3]), {
@@ -91,10 +89,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 				text: "hello",
 				fetchImpl: async (input, init) => {
 					if (String(input).includes("/apis/mbAIsc?")) {
-						return Response.json({
-							code: 200,
-							url: "https://api.milorapart.top/voice/test.mp3",
-						});
+						return legacyMetadataOk();
 					}
 
 					sawManualRedirect = init?.redirect === "manual";
@@ -122,10 +117,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 			text: "hello",
 			fetchImpl: async (input, init) => {
 				if (String(input).includes("/apis/mbAIsc?")) {
-					return Response.json({
-						code: 200,
-						url: "https://api.milorapart.top/voice/test.mp3",
-					});
+					return legacyMetadataOk();
 				}
 
 				downloadCallCount++;
@@ -164,10 +156,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 				text: "中".repeat(400),
 				fetchImpl: async () => {
 					fetchCalled = true;
-					return Response.json({
-						code: 200,
-						url: "https://api.milorapart.top/voice/test.mp3",
-					});
+					return legacyMetadataOk();
 				},
 			}),
 		).rejects.toThrow("Legacy TTS text is too long for GET fallback");
@@ -179,7 +168,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 		await expect(
 			synthesizeSpeechWithLegacyProvider({
 				text: "hello",
-				timeoutMs: 10,
+				timeoutMs: TEST_TIMEOUT_MS,
 				fetchImpl: async (_input, init) =>
 					new Promise((_resolve, reject) => {
 						init?.signal?.addEventListener(
@@ -201,15 +190,12 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 		await expect(
 			synthesizeSpeechWithLegacyProvider({
 				text: "hello",
-				timeoutMs: 10,
+				timeoutMs: TEST_TIMEOUT_MS,
 				fetchImpl: async (_input, init) => {
 					callCount++;
 
 					if (callCount === 1) {
-						return Response.json({
-							code: 200,
-							url: "https://api.milorapart.top/voice/test.mp3",
-						});
+						return legacyMetadataOk();
 					}
 
 					return new Promise((_resolve, reject) => {
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index 6f55c1d..0f272bc 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -264,12 +264,18 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 		}
 	}
 
+	if (!lastErrorResponse) {
+		throw new Error(
+			"Expected external TTS to capture an upstream response before failing",
+		);
+	}
+
 	throw new TtsError({
 		code: "EXTERNAL_TTS_UPSTREAM",
 		message: `External TTS request failed: ${await getUpstreamErrorMessage({
-			response: lastErrorResponse ?? new Response(null, { status: 500 }),
+			response: lastErrorResponse,
 		})}`,
-		retryable: isRetryableStatus(lastErrorResponse?.status),
-		status: lastErrorResponse?.status,
+		retryable: isRetryableStatus(lastErrorResponse.status),
+		status: lastErrorResponse.status,
 	});
 }

From dcc5a176c9e39d30664e80f6c7e221a85ca19e2a Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Fri, 20 Mar 2026 00:43:58 +0800
Subject: [PATCH 15/22] docs(tts): document external provider env vars

Summary:
- add external TTS env examples to apps/web/.env.example
- document preferred EXTERNAL_TTS_API_* settings in the README

Rationale:
- keep repo-local setup docs aligned with the shipped TTS adapter
- reduce configuration drift while origin push and live provider remain blocked

Tests:
- rg -n "EXTERNAL_TTS_API_BASE_URL|EXTERNAL_TTS_API_MODEL|EXTERNAL_TTS_API_KEY|compatibility aliases|Optional TTS env values" apps/web/.env.example README.md
- git diff --check

Co-authored-by: Codex <codex@openai.com>
---
 README.md             | 12 ++++++++++++
 apps/web/.env.example | 12 +++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e26ac14..eac68fa 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,18 @@ UPSTASH_REDIS_REST_TOKEN="cutia_redis_token"
 NODE_ENV="development"
 ```
 
+Optional TTS env values:
+
+```bash
+EXTERNAL_TTS_API_BASE_URL="https://your-tts-provider.example.com/v1"
+EXTERNAL_TTS_API_MODEL="your_tts_model"
+EXTERNAL_TTS_API_KEY="your_tts_api_key"
+```
+
+Cutia prefers `EXTERNAL_TTS_API_*` for external speech synthesis. The legacy
+`API_BASE_URL` / `API_MODEL` / `API_KEY` names are still accepted as
+compatibility aliases when the namespaced variables are absent.
+
 To enable authentication, also start PostgreSQL and add these env values:
 
 ```bash
diff --git a/apps/web/.env.example b/apps/web/.env.example
index 85e483b..0b78b6c 100644
--- a/apps/web/.env.example
+++ b/apps/web/.env.example
@@ -16,9 +16,19 @@ UPSTASH_REDIS_REST_TOKEN=example_token_here
 FREESOUND_CLIENT_ID=your_client_id_here
 FREESOUND_API_KEY=your_api_key_here
 
+# Optional: external OpenAI-compatible TTS provider
+# Preferred namespaced variables:
+EXTERNAL_TTS_API_BASE_URL=https://your-tts-provider.example.com/v1
+EXTERNAL_TTS_API_MODEL=your_tts_model
+EXTERNAL_TTS_API_KEY=your_tts_api_key
+# Compatibility aliases used when EXTERNAL_TTS_* is absent:
+# API_BASE_URL=https://your-shared-api.example.com/v1
+# API_MODEL=your_tts_model
+# API_KEY=your_tts_api_key
+
 # Cloudflare R2 (for reference image uploads)
 R2_ACCOUNT_ID=your_r2_account_id
 R2_ACCESS_KEY_ID=your_r2_access_key_id
 R2_SECRET_ACCESS_KEY=your_r2_secret_access_key
 R2_BUCKET_NAME=your_r2_bucket_name
-R2_PUBLIC_URL=https://your-r2-public-url.example.com
\ No newline at end of file
+R2_PUBLIC_URL=https://your-r2-public-url.example.com

From 1b99bcc9a7a1a3fb7db8aef13badf927b800c0e7 Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Fri, 20 Mar 2026 00:49:04 +0800
Subject: [PATCH 16/22] docs(tts): add live probe commands

Summary:
- document an adapter-level external TTS probe in the README
- document an end-to-end route probe in the README

Rationale:
- make future provider validation runnable without digging through Linear
- reduce manual handoff friction while external blockers remain

Tests:
- rg -n "Cutia TTS probe|Cutia route probe|verify that the configured provider can actually return audio|verify the route end-to-end" README.md
- git diff --check

Co-authored-by: Codex <codex@openai.com>
---
 README.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/README.md b/README.md
index eac68fa..7d43b9b 100644
--- a/README.md
+++ b/README.md
@@ -84,6 +84,18 @@ Cutia prefers `EXTERNAL_TTS_API_*` for external speech synthesis. The legacy
 `API_BASE_URL` / `API_MODEL` / `API_KEY` names are still accepted as
 compatibility aliases when the namespaced variables are absent.
 
+To verify that the configured provider can actually return audio, run:
+
+```bash
+bun --eval 'import { getExternalTtsConfig, synthesizeSpeechWithOpenAiCompatible } from "./src/lib/tts/openai-compatible.ts"; const config = getExternalTtsConfig({ env: process.env }); const audio = await synthesizeSpeechWithOpenAiCompatible({ config, text: "Cutia TTS probe", voice: "default" }); console.log(audio.byteLength);'
+```
+
+If you want to verify the route end-to-end from the app directory, run:
+
+```bash
+NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:3000 UPSTASH_REDIS_REST_URL=http://localhost:8079 UPSTASH_REDIS_REST_TOKEN=cutia_redis_token bun --eval 'import { NextRequest } from "next/server"; import { POST } from "./src/app/api/tts/generate/route.ts"; const request = new NextRequest("http://localhost/api/tts/generate", { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ text: "Cutia route probe", voice: "default" }) }); const response = await POST(request); console.log(response.status); console.log(await response.text());'
+```
+
 To enable authentication, also start PostgreSQL and add these env values:
 
 ```bash

From db757802866f3e29e4bc2e9882675e96d22b554f Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Fri, 20 Mar 2026 01:17:39 +0800
Subject: [PATCH 17/22] feat(tts): add responses websocket fallback

Summary:
- add a /responses WebSocket fallback to the external TTS adapter
  when /audio/speech is unavailable
- assemble audio chunks from response audio delta events and map
  websocket close reasons into structured upstream errors
- extend TTS adapter and provider tests to cover the new fallback
  path and updated error behavior

Rationale:
- the active shared gateway exposes TTS generation through
  /responses over WebSocket rather than only /audio/speech
- supporting both compatibility shapes narrows the remaining live
  blocker to provider account availability instead of protocol gaps

Tests:
- bun test ./src/lib/tts/service.test.ts ./src/lib/tts/fetch-with-timeout.test.ts ./src/lib/tts/provider.test.ts ./src/lib/tts/openai-compatible.test.ts ./src/lib/tts/legacy.test.ts ./src/app/api/tts/generate/route.test.ts
- bun --eval 'import { getExternalTtsConfig, synthesizeSpeechWithOpenAiCompatible } from "./src/lib/tts/openai-compatible.ts"; const config = getExternalTtsConfig({ env: process.env }); const audio = await synthesizeSpeechWithOpenAiCompatible({ config, text: "Cutia TTS probe", voice: "default" }); console.log(audio.byteLength);' (fails: External TTS websocket request failed: no available account)
- NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:3000 UPSTASH_REDIS_REST_URL=http://localhost:8079 UPSTASH_REDIS_REST_TOKEN=cutia_redis_token bun --eval 'import { NextRequest } from "next/server"; import { POST } from "./src/app/api/tts/generate/route.ts"; const request = new NextRequest("http://localhost/api/tts/generate", { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ text: "Cutia route probe", voice: "default" }) }); const response = await POST(request); console.log(response.status); console.log(await response.text());' (returns 502 with no available account)

Co-authored-by: Codex <codex@openai.com>
---
 .../web/src/lib/tts/openai-compatible.test.ts | 172 +++++++-
 apps/web/src/lib/tts/openai-compatible.ts     | 404 +++++++++++++++++-
 apps/web/src/lib/tts/provider.test.ts         |  21 +-
 3 files changed, 577 insertions(+), 20 deletions(-)

diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index eaad7e8..383e6fd 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -5,6 +5,52 @@ import {
 	synthesizeSpeechWithOpenAiCompatible,
 } from "./openai-compatible";
 
+type WebSocketListenerMap = {
+	close: Array<(event: { code: number; reason: string }) => void>;
+	error: Array<(event: { message?: string; type?: string }) => void>;
+	message: Array<(event: { data: unknown }) => void>;
+	open: Array<() => void>;
+};
+
+class FakeWebSocket {
+	public readonly sentMessages: string[] = [];
+	private readonly listeners: WebSocketListenerMap = {
+		close: [],
+		error: [],
+		message: [],
+		open: [],
+	};
+
+	constructor(
+		public readonly url: string,
+		public readonly init?: { headers?: Record<string, string> },
+	) {}
+
+	addEventListener<K extends keyof WebSocketListenerMap>(
+		type: K,
+		listener: WebSocketListenerMap[K][number],
+	) {
+		this.listeners[type].push(listener);
+	}
+
+	close(code = 1000, reason = "") {
+		this.emit("close", { code, reason });
+	}
+
+	emit<K extends keyof WebSocketListenerMap>(
+		type: K,
+		event: Parameters<WebSocketListenerMap[K][number]>[0],
+	) {
+		for (const listener of this.listeners[type]) {
+			listener(event as never);
+		}
+	}
+
+	send(message: string) {
+		this.sentMessages.push(message);
+	}
+}
+
 describe("getExternalTtsConfig", () => {
 	test("reads namespaced TTS config from environment", () => {
 		const config = getExternalTtsConfig({
@@ -236,9 +282,9 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 				text: "hello",
 				voice: "default",
 				fetchImpl: async () =>
-					new Response("<!doctype html>", {
+					new Response("not audio", {
 						status: 200,
-						headers: { "Content-Type": "text/html; charset=utf-8" },
+						headers: { "Content-Type": "text/plain; charset=utf-8" },
 					}),
 			}),
 		).rejects.toThrow("Expected audio response");
@@ -398,4 +444,126 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 			}),
 		).rejects.toThrow('{"message":"bad request"}');
 	});
+
+	test("falls back to /responses websocket audio when /audio/speech returns 404", async () => {
+		const sockets: FakeWebSocket[] = [];
+		const synthesis = synthesizeSpeechWithOpenAiCompatible({
+			config: {
+				apiBaseUrl: "https://example.com/v1",
+				apiKey: "secret",
+				model: "tts-1",
+			},
+			text: "hello",
+			voice: "default",
+			createWebSocket: (url, init) => {
+				const socket = new FakeWebSocket(url, init);
+				sockets.push(socket);
+				return socket;
+			},
+			fetchImpl: async () => new Response("page not found", { status: 404 }),
+		});
+		await new Promise((resolve) => setTimeout(resolve, 0));
+
+		expect(sockets).toHaveLength(1);
+		expect(sockets[0]?.url).toBe("wss://example.com/v1/responses");
+		expect(sockets[0]?.init?.headers?.Authorization).toBe("Bearer secret");
+
+		sockets[0]?.emit("open", undefined);
+		expect(JSON.parse(sockets[0]?.sentMessages[0] ?? "")).toEqual({
+			audio: { format: "mp3" },
+			input: "hello",
+			model: "tts-1",
+			output_modalities: ["audio"],
+			response: {
+				instructions: "hello",
+				modalities: ["audio"],
+				output_audio_format: "mp3",
+				voice: DEFAULT_EXTERNAL_TTS_VOICE,
+			},
+			type: "response.create",
+		});
+		sockets[0]?.emit("message", {
+			data: JSON.stringify({
+				type: "response.audio.delta",
+				delta: Buffer.from(Uint8Array.from([7, 8, 9])).toString("base64"),
+			}),
+		});
+		sockets[0]?.emit("message", {
+			data: JSON.stringify({ type: "response.completed" }),
+		});
+
+		expect(Array.from(new Uint8Array(await synthesis))).toEqual([7, 8, 9]);
+	});
+
+	test("falls back to /responses websocket audio when /audio/speech returns html", async () => {
+		const sockets: FakeWebSocket[] = [];
+		const synthesis = synthesizeSpeechWithOpenAiCompatible({
+			config: {
+				apiBaseUrl: "https://example.com/v1",
+				apiKey: "secret",
+				model: "tts-1",
+			},
+			text: "hello",
+			voice: "echo",
+			createWebSocket: (url, init) => {
+				const socket = new FakeWebSocket(url, init);
+				sockets.push(socket);
+				return socket;
+			},
+			fetchImpl: async () =>
+				new Response("<!doctype html>", {
+					status: 200,
+					headers: { "Content-Type": "text/html; charset=utf-8" },
+				}),
+		});
+		await new Promise((resolve) => setTimeout(resolve, 0));
+
+		sockets[0]?.emit("open", undefined);
+		sockets[0]?.emit("message", {
+			data: JSON.stringify({
+				type: "response.output_audio.delta",
+				delta: Buffer.from(Uint8Array.from([1, 2, 3, 4])).toString("base64"),
+			}),
+		});
+		sockets[0]?.emit("message", {
+			data: JSON.stringify({ type: "response.done" }),
+		});
+
+		expect(Array.from(new Uint8Array(await synthesis))).toEqual([1, 2, 3, 4]);
+		expect(JSON.parse(sockets[0]?.sentMessages[0] ?? "").response.voice).toBe(
+			"echo",
+		);
+	});
+
+	test("surfaces websocket close reasons as structured upstream errors", async () => {
+		const sockets: FakeWebSocket[] = [];
+		const synthesis = synthesizeSpeechWithOpenAiCompatible({
+			config: {
+				apiBaseUrl: "https://example.com/v1",
+				apiKey: "secret",
+				model: "tts-1",
+			},
+			text: "hello",
+			voice: "default",
+			createWebSocket: (url, init) => {
+				const socket = new FakeWebSocket(url, init);
+				sockets.push(socket);
+				return socket;
+			},
+			fetchImpl: async () => new Response("page not found", { status: 404 }),
+		});
+		await new Promise((resolve) => setTimeout(resolve, 0));
+
+		sockets[0]?.emit("open", undefined);
+		sockets[0]?.emit("close", {
+			code: 1013,
+			reason: "no available account",
+		});
+
+		await expect(synthesis).rejects.toMatchObject({
+			code: "EXTERNAL_TTS_UPSTREAM",
+			message: "External TTS websocket request failed: no available account",
+			retryable: false,
+		});
+	});
 });
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index 0f272bc..0cbf392 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -12,6 +12,7 @@ const externalTtsConfigSchema = z.object({
 	API_KEY: z.string().min(1),
 });
 const EXTERNAL_TTS_TIMEOUT_MS = 15_000;
+const EXTERNAL_TTS_RESPONSES_AUDIO_FORMAT = "mp3";
 
 export { DEFAULT_EXTERNAL_TTS_VOICE };
 
@@ -21,6 +22,58 @@ export interface ExternalTtsConfig {
 	model: string;
 }
 
+interface ExternalTtsWebSocketMessageEvent {
+	data: unknown;
+}
+
+interface ExternalTtsWebSocketErrorEvent {
+	message?: string;
+	type?: string;
+}
+
+interface ExternalTtsWebSocketCloseEvent {
+	code: number;
+	reason: string;
+}
+
+export interface ExternalTtsWebSocketLike {
+	addEventListener(
+		type: "close",
+		listener: (event: ExternalTtsWebSocketCloseEvent) => void,
+	): void;
+	addEventListener(
+		type: "error",
+		listener: (event: ExternalTtsWebSocketErrorEvent) => void,
+	): void;
+	addEventListener(
+		type: "message",
+		listener: (event: ExternalTtsWebSocketMessageEvent) => void,
+	): void;
+	addEventListener(type: "open", listener: () => void): void;
+	close(code?: number, reason?: string): void;
+	removeEventListener?(
+		type: "close",
+		listener: (event: ExternalTtsWebSocketCloseEvent) => void,
+	): void;
+	removeEventListener?(
+		type: "error",
+		listener: (event: ExternalTtsWebSocketErrorEvent) => void,
+	): void;
+	removeEventListener?(
+		type: "message",
+		listener: (event: ExternalTtsWebSocketMessageEvent) => void,
+	): void;
+	removeEventListener?(type: "open", listener: () => void): void;
+	send(data: string): void;
+}
+
+export type ExternalTtsWebSocketFactory = (
+	url: string,
+	init?: {
+		headers?: Record<string, string>;
+	},
+) => ExternalTtsWebSocketLike;
+
 function resolveExternalTtsEnv({
 	env,
 }: {
@@ -169,14 +222,343 @@ function getSpeechEndpointUrls({
 	return [...new Set(urls)];
 }
 
+function getResponsesEndpointUrls({
+	apiBaseUrl,
+}: {
+	apiBaseUrl: string;
+}): string[] {
+	const normalizedBaseUrl = apiBaseUrl.replace(/\/+$/, "");
+	const baseWithoutV1 = normalizedBaseUrl.endsWith("/v1")
+		? normalizedBaseUrl.slice(0, -3)
+		: normalizedBaseUrl;
+	const baseWithV1 = normalizedBaseUrl.endsWith("/v1")
+		? normalizedBaseUrl
+		: `${normalizedBaseUrl}/v1`;
+	const urls = [`${baseWithV1}/responses`, `${baseWithoutV1}/responses`];
+
+	return [...new Set(urls)];
+}
+
+function toWebSocketUrl({ url }: { url: string }): string {
+	const parsed = new URL(url);
+	parsed.protocol = parsed.protocol === "https:" ? "wss:" : "ws:";
+	return parsed.toString();
+}
+
+function isAudioContentType({ contentType }: { contentType: string }): boolean {
+	const mimeType = contentType.split(";")[0]?.trim().toLowerCase() ?? "";
+
+	return mimeType.startsWith("audio/") || mimeType === "application/octet-stream";
+}
+
+function shouldTryResponsesWebSocket({
+	response,
+}: {
+	response: Response;
+}): boolean {
+	if (response.status === 404 || response.status === 405 || response.status === 426) {
+		return true;
+	}
+
+	if (!response.ok) {
+		return false;
+	}
+
+	const contentType = response.headers.get("content-type") ?? "";
+	const mimeType = contentType.split(";")[0]?.trim().toLowerCase() ?? "";
+
+	return mimeType === "text/html";
+}
+
+function getResponsesWebSocketCloseRetryable({
+	code,
+	reason,
+}: {
+	code: number;
+	reason: string;
+}): boolean {
+	const normalizedReason = reason.trim().toLowerCase();
+
+	if (
+		normalizedReason.includes("no available account") ||
+		normalizedReason.includes("required") ||
+		normalizedReason.includes("unsupported")
+	) {
+		return false;
+	}
+
+	return code === 1006 || code === 1011 || code === 1012 || code === 1013;
+}
+
+function getResponsesWebSocketError({
+	code,
+	reason,
+}: {
+	code: number;
+	reason: string;
+}): TtsError {
+	return new TtsError({
+		code: "EXTERNAL_TTS_UPSTREAM",
+		message: `External TTS websocket request failed: ${
+			reason || `WebSocket closed (${code})`
+		}`,
+		retryable: getResponsesWebSocketCloseRetryable({ code, reason }),
+	});
+}
+
+function getResponseEventErrorMessage({
+	event,
+}: {
+	event: Record<string, unknown>;
+}): string | null {
+	if (typeof event.message === "string" && event.message.trim()) {
+		return event.message;
+	}
+
+	if (
+		typeof event.error === "object" &&
+		event.error !== null &&
+		"message" in event.error &&
+		typeof event.error.message === "string" &&
+		event.error.message.trim()
+	) {
+		return event.error.message;
+	}
+
+	return null;
+}
+
+function createExternalTtsWebSocket(
+	url: string,
+	init?: { headers?: Record<string, string> },
+): ExternalTtsWebSocketLike {
+	type NodeCompatibleWebSocket = new (
+		url: string,
+		init?: { headers?: Record<string, string> },
+	) => ExternalTtsWebSocketLike;
+
+	const WebSocketCtor =
+		globalThis.WebSocket as unknown as NodeCompatibleWebSocket;
+
+	return new WebSocketCtor(url, init);
+}
+
+async function synthesizeSpeechWithResponsesWebSocket({
+	config,
+	createWebSocket = createExternalTtsWebSocket,
+	text,
+	voice,
+}: {
+	config: ExternalTtsConfig;
+	createWebSocket?: ExternalTtsWebSocketFactory;
+	text: string;
+	voice?: string;
+}): Promise<ArrayBuffer> {
+	const endpointUrl = toWebSocketUrl({
+		url:
+			getResponsesEndpointUrls({ apiBaseUrl: config.apiBaseUrl })[0] ??
+			`${config.apiBaseUrl.replace(/\/+$/, "")}/responses`,
+	});
+	const audioChunks: Uint8Array[] = [];
+
+	return await new Promise<ArrayBuffer>((resolve, reject) => {
+		const socket = createWebSocket(endpointUrl, {
+			headers: {
+				Authorization: `Bearer ${config.apiKey}`,
+			},
+		});
+		let settled = false;
+
+		const cleanup = () => {
+			socket.removeEventListener?.("close", handleClose);
+			socket.removeEventListener?.("error", handleError);
+			socket.removeEventListener?.("message", handleMessage);
+			socket.removeEventListener?.("open", handleOpen);
+		};
+
+		const finish = ({
+			error,
+			value,
+		}: {
+			error?: TtsError;
+			value?: ArrayBuffer;
+		}) => {
+			if (settled) {
+				return;
+			}
+
+			settled = true;
+			cleanup();
+
+			try {
+				socket.close();
+			} catch {
+				// Best effort cleanup only.
+			}
+
+			if (error) {
+				reject(error);
+				return;
+			}
+
+			resolve(value ?? new ArrayBuffer(0));
+		};
+
+		const handleOpen = () => {
+			try {
+				socket.send(
+					JSON.stringify({
+						audio: {
+							format: EXTERNAL_TTS_RESPONSES_AUDIO_FORMAT,
+						},
+						input: text,
+						model: config.model,
+						output_modalities: ["audio"],
+						response: {
+							instructions: text,
+							modalities: ["audio"],
+							output_audio_format: EXTERNAL_TTS_RESPONSES_AUDIO_FORMAT,
+							voice: resolveVoice({ voice }),
+						},
+						type: "response.create",
+					}),
+				);
+			} catch (error) {
+				finish({
+					error: wrapExternalUpstreamError({ error }),
+				});
+			}
+		};
+
+		const handleMessage = async ({
+			data,
+		}: ExternalTtsWebSocketMessageEvent) => {
+			try {
+				if (data instanceof Blob) {
+					audioChunks.push(new Uint8Array(await data.arrayBuffer()));
+					return;
+				}
+
+				if (data instanceof ArrayBuffer) {
+					audioChunks.push(new Uint8Array(data));
+					return;
+				}
+
+				if (ArrayBuffer.isView(data)) {
+					audioChunks.push(
+						new Uint8Array(
+							data.buffer.slice(
+								data.byteOffset,
+								data.byteOffset + data.byteLength,
+							),
+						),
+					);
+					return;
+				}
+
+				if (typeof data !== "string") {
+					return;
+				}
+
+				const event = JSON.parse(data) as Record<string, unknown>;
+				const type = typeof event.type === "string" ? event.type : "";
+
+				if (
+					type === "response.audio.delta" ||
+					type === "response.output_audio.delta"
+				) {
+					if (typeof event.delta === "string" && event.delta.length > 0) {
+						audioChunks.push(Uint8Array.from(Buffer.from(event.delta, "base64")));
+					}
+					return;
+				}
+
+				if (type === "response.completed" || type === "response.done") {
+					const audio = Buffer.concat(
+						audioChunks.map((chunk) => Buffer.from(chunk)),
+					);
+
+					if (audio.byteLength === 0) {
+						finish({
+							error: new TtsError({
+								code: "EXTERNAL_TTS_UPSTREAM",
+								message: "External TTS returned empty audio",
+								retryable: false,
+							}),
+						});
+						return;
+					}
+
+					finish({
+						value: audio.buffer.slice(
+							audio.byteOffset,
+							audio.byteOffset + audio.byteLength,
+						),
+					});
+					return;
+				}
+
+				if (
+					type === "error" ||
+					type === "response.error" ||
+					type === "response.failed" ||
+					type === "response.incomplete"
+				) {
+					finish({
+						error: new TtsError({
+							code: "EXTERNAL_TTS_UPSTREAM",
+							message:
+								getResponseEventErrorMessage({ event }) ??
+								"External TTS websocket request failed",
+							retryable: false,
+						}),
+					});
+				}
+			} catch (error) {
+				finish({
+					error: wrapExternalUpstreamError({ error }),
+				});
+			}
+		};
+
+		const handleError = (event: ExternalTtsWebSocketErrorEvent) => {
+			finish({
+				error: new TtsError({
+					code: "EXTERNAL_TTS_UPSTREAM",
+					message:
+						event.message?.trim() || "External TTS websocket request failed",
+					retryable: true,
+				}),
+			});
+		};
+
+		const handleClose = ({ code, reason }: ExternalTtsWebSocketCloseEvent) => {
+			if (settled) {
+				return;
+			}
+
+			finish({
+				error: getResponsesWebSocketError({ code, reason }),
+			});
+		};
+
+		socket.addEventListener("open", handleOpen);
+		socket.addEventListener("message", handleMessage);
+		socket.addEventListener("error", handleError);
+		socket.addEventListener("close", handleClose);
+	});
+}
+
 export async function synthesizeSpeechWithOpenAiCompatible({
 	config,
+	createWebSocket = createExternalTtsWebSocket,
 	text,
 	voice,
 	fetchImpl = fetch,
 	timeoutMs = EXTERNAL_TTS_TIMEOUT_MS,
 }: {
 	config: ExternalTtsConfig;
+	createWebSocket?: ExternalTtsWebSocketFactory;
 	text: string;
 	voice?: string;
 	fetchImpl?: FetchLike;
@@ -218,12 +600,12 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 
 		if (response.ok) {
 			const contentType = response.headers.get("content-type") ?? "";
-			const mimeType = contentType.split(";")[0]?.trim().toLowerCase() ?? "";
+			if (!isAudioContentType({ contentType })) {
+				if (shouldTryResponsesWebSocket({ response })) {
+					lastErrorResponse = response;
+					break;
+				}
 
-			if (
-				!mimeType.startsWith("audio/") &&
-				mimeType !== "application/octet-stream"
-			) {
 				throw new TtsError({
 					code: "EXTERNAL_TTS_UPSTREAM",
 					message: `Expected audio response, received ${contentType || "(no content-type)"}`,
@@ -264,6 +646,18 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 		}
 	}
 
+	if (
+		lastErrorResponse &&
+		shouldTryResponsesWebSocket({ response: lastErrorResponse })
+	) {
+		return synthesizeSpeechWithResponsesWebSocket({
+			config,
+			createWebSocket,
+			text,
+			voice,
+		});
+	}
+
 	if (!lastErrorResponse) {
 		throw new Error(
 			"Expected external TTS to capture an upstream response before failing",
diff --git a/apps/web/src/lib/tts/provider.test.ts b/apps/web/src/lib/tts/provider.test.ts
index abef510..2487e35 100644
--- a/apps/web/src/lib/tts/provider.test.ts
+++ b/apps/web/src/lib/tts/provider.test.ts
@@ -1,6 +1,5 @@
 import { describe, expect, test } from "bun:test";
 import { TtsError } from "./errors";
-import { synthesizeSpeechWithOpenAiCompatible } from "./openai-compatible";
 import { synthesizeSpeechWithFallback } from "./provider";
 
 describe("synthesizeSpeechWithFallback", () => {
@@ -126,17 +125,13 @@ describe("synthesizeSpeechWithFallback", () => {
 				},
 				text: "hello",
 				voice: "default",
-				openAiSynthesize: ({ config, text, voice }) =>
-					synthesizeSpeechWithOpenAiCompatible({
-						config,
-						text,
-						voice,
-						fetchImpl: async () =>
-							new Response("<!doctype html>", {
-								status: 200,
-								headers: { "Content-Type": "text/html; charset=utf-8" },
-							}),
-					}),
+				openAiSynthesize: async () => {
+					throw new TtsError({
+						code: "EXTERNAL_TTS_UPSTREAM",
+						message: "External TTS websocket request failed: no available account",
+						retryable: false,
+					});
+				},
 				legacySynthesize: async () => {
 					legacyCalled = true;
 					return Uint8Array.from([7, 8, 9]).buffer;
@@ -144,7 +139,7 @@ describe("synthesizeSpeechWithFallback", () => {
 			}),
 		).rejects.toMatchObject({
 			code: "EXTERNAL_TTS_UPSTREAM",
-			message: "Expected audio response, received text/html; charset=utf-8",
+			message: "External TTS websocket request failed: no available account",
 			retryable: false,
 		});
 

From 9be976b084b9a11e9327005f7287b6ad99416d03 Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Fri, 20 Mar 2026 02:38:36 +0800
Subject: [PATCH 18/22] test(tts): fix websocket test typing

Summary:
- fix the FakeWebSocket helper typings in openai-compatible tests so tsc
  accepts eventful listeners without collapsing them to the open handler
- update the websocket test call sites to use a no-arg open event helper
- apply biome formatting to the touched TTS files

Rationale:
- the repo-level web typecheck was still failing even though the test suite
  passed, which left the ticket with an unverified validation gap
- keeping the fix scoped to the test helper preserves runtime behavior while
  restoring the full validation surface for the TTS work

Tests:
- bun test apps/web/src/lib/tts/openai-compatible.test.ts
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/service.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx tsc -p apps/web/tsconfig.json --noEmit
- bunx @biomejs/biome check apps/web/src/app/api/tts/generate/route.ts apps/web/src/app/api/tts/generate/route.test.ts apps/web/src/lib/tts/errors.ts apps/web/src/lib/tts/fetch-with-timeout.ts apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/legacy.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/service.ts apps/web/src/lib/tts/service.test.ts apps/web/src/constants/tts-constants.ts packages/env/src/web.ts README.md apps/web/.env.example docs/plans/2026-03-17-tts-external-provider-design.md docs/plans/2026-03-17-tts-external-provider.md

Co-authored-by: Codex <codex@openai.com>
---
 .../web/src/lib/tts/openai-compatible.test.ts | 54 ++++++++++++++-----
 apps/web/src/lib/tts/openai-compatible.ts     | 14 +++--
 apps/web/src/lib/tts/provider.test.ts         |  3 +-
 3 files changed, 54 insertions(+), 17 deletions(-)

diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index 383e6fd..2372246 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -26,23 +26,51 @@ class FakeWebSocket {
 		public readonly init?: { headers?: Record<string, string> },
 	) {}
 
-	addEventListener<K extends keyof WebSocketListenerMap>(
-		type: K,
-		listener: WebSocketListenerMap[K][number],
+	addEventListener(
+		type: "close",
+		listener: WebSocketListenerMap["close"][number],
+	): void;
+	addEventListener(
+		type: "error",
+		listener: WebSocketListenerMap["error"][number],
+	): void;
+	addEventListener(
+		type: "message",
+		listener: WebSocketListenerMap["message"][number],
+	): void;
+	addEventListener(
+		type: "open",
+		listener: WebSocketListenerMap["open"][number],
+	): void;
+	addEventListener(
+		type: keyof WebSocketListenerMap,
+		listener: WebSocketListenerMap[keyof WebSocketListenerMap][number],
 	) {
-		this.listeners[type].push(listener);
+		(
+			this.listeners[type] as Array<
+				(
+					event?:
+						| { code: number; reason: string }
+						| { message?: string; type?: string }
+						| { data: unknown },
+				) => void
+			>
+		).push(listener as (event?: unknown) => void);
 	}
 
 	close(code = 1000, reason = "") {
 		this.emit("close", { code, reason });
 	}
 
-	emit<K extends keyof WebSocketListenerMap>(
-		type: K,
-		event: Parameters<WebSocketListenerMap[K][number]>[0],
-	) {
-		for (const listener of this.listeners[type]) {
-			listener(event as never);
+	emit(type: "close", event: { code: number; reason: string }): void;
+	emit(type: "error", event: { message?: string; type?: string }): void;
+	emit(type: "message", event: { data: unknown }): void;
+	emit(type: "open"): void;
+	emit(type: keyof WebSocketListenerMap, event?: unknown) {
+		for (const listener of this.listeners[type] as Array<
+			(event?: unknown) => void
+		>) {
+			listener(event);
 		}
 	}
 
@@ -468,7 +496,7 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		expect(sockets[0]?.url).toBe("wss://example.com/v1/responses");
 		expect(sockets[0]?.init?.headers?.Authorization).toBe("Bearer secret");
 
-		sockets[0]?.emit("open", undefined);
+		sockets[0]?.emit("open");
 		expect(JSON.parse(sockets[0]?.sentMessages[0] ?? "")).toEqual({
 			audio: { format: "mp3" },
 			input: "hello",
@@ -518,7 +546,7 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		});
 		await new Promise((resolve) => setTimeout(resolve, 0));
 
-		sockets[0]?.emit("open", undefined);
+		sockets[0]?.emit("open");
 		sockets[0]?.emit("message", {
 			data: JSON.stringify({
 				type: "response.output_audio.delta",
@@ -554,7 +582,7 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		});
 		await new Promise((resolve) => setTimeout(resolve, 0));
 
-		sockets[0]?.emit("open", undefined);
+		sockets[0]?.emit("open");
 		sockets[0]?.emit("close", {
 			code: 1013,
 			reason: "no available account",
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index 0cbf392..9eb4a3f 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -248,7 +248,9 @@ function toWebSocketUrl({ url }: { url: string }): string {
 function isAudioContentType({ contentType }: { contentType: string }): boolean {
 	const mimeType = contentType.split(";")[0]?.trim().toLowerCase() ?? "";
 
-	return mimeType.startsWith("audio/") || mimeType === "application/octet-stream";
+	return (
+		mimeType.startsWith("audio/") || mimeType === "application/octet-stream"
+	);
 }
 
 function shouldTryResponsesWebSocket({
@@ -256,7 +258,11 @@ function shouldTryResponsesWebSocket({
 }: {
 	response: Response;
 }): boolean {
-	if (response.status === 404 || response.status === 405 || response.status === 426) {
+	if (
+		response.status === 404 ||
+		response.status === 405 ||
+		response.status === 426
+	) {
 		return true;
 	}
 
@@ -468,7 +474,9 @@ async function synthesizeSpeechWithResponsesWebSocket({
 					type === "response.output_audio.delta"
 				) {
 					if (typeof event.delta === "string" && event.delta.length > 0) {
-						audioChunks.push(Uint8Array.from(Buffer.from(event.delta, "base64")));
+						audioChunks.push(
+							Uint8Array.from(Buffer.from(event.delta, "base64")),
+						);
 					}
 					return;
 				}
diff --git a/apps/web/src/lib/tts/provider.test.ts b/apps/web/src/lib/tts/provider.test.ts
index 2487e35..3269ce9 100644
--- a/apps/web/src/lib/tts/provider.test.ts
+++ b/apps/web/src/lib/tts/provider.test.ts
@@ -128,7 +128,8 @@ describe("synthesizeSpeechWithFallback", () => {
 				openAiSynthesize: async () => {
 					throw new TtsError({
 						code: "EXTERNAL_TTS_UPSTREAM",
-						message: "External TTS websocket request failed: no available account",
+						message:
+							"External TTS websocket request failed: no available account",
 						retryable: false,
 					});
 				},

From 2068474992b261e9ba31be7a90bd523b23514cff Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Fri, 20 Mar 2026 02:54:04 +0800
Subject: [PATCH 19/22] fix(tts): fall back on account exhaustion

Summary:
- treat websocket `no available account` closes as retryable external
  upstream errors
- add regression coverage proving provider fallback recovers from account
  exhaustion while keeping non-retryable websocket errors terminal
- update the design note so retryable external failures include websocket
  capacity exhaustion

Rationale:
- the shared external gateway currently reports account exhaustion even
  though the legacy provider is healthy, so treating it as terminal left
  real route requests failing with 502 unnecessarily
- classifying account exhaustion as retryable preserves the explicit error
  semantics inside the adapter while restoring user-facing audio output via
  the existing legacy fallback path

Tests:
- bun test apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.test.ts
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/service.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx tsc -p apps/web/tsconfig.json --noEmit
- bunx @biomejs/biome check apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.test.ts docs/plans/2026-03-17-tts-external-provider-design.md
- cd apps/web && NEXT_PUBLIC_SITE_URL=http://localhost:4100 UPSTASH_REDIS_REST_URL=https://example.com UPSTASH_REDIS_REST_TOKEN=dummy NODE_ENV=production bun run build
- NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:4311 UPSTASH_REDIS_REST_URL=https://example.com UPSTASH_REDIS_REST_TOKEN=dummy bun --eval 'import { NextRequest } from "next/server"; import { POST } from "./apps/web/src/app/api/tts/generate/route.ts"; /* status=200 audioLength=34364 */'

Co-authored-by: Codex <codex@openai.com>
---
 .../web/src/lib/tts/openai-compatible.test.ts |  4 +--
 apps/web/src/lib/tts/openai-compatible.ts     |  1 -
 apps/web/src/lib/tts/provider.test.ts         | 31 ++++++++++++++++++-
 ...2026-03-17-tts-external-provider-design.md |  2 +-
 4 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index 2372246..3fc238a 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -563,7 +563,7 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		);
 	});
 
-	test("surfaces websocket close reasons as structured upstream errors", async () => {
+	test("marks websocket account exhaustion as retryable so legacy fallback can recover", async () => {
 		const sockets: FakeWebSocket[] = [];
 		const synthesis = synthesizeSpeechWithOpenAiCompatible({
 			config: {
@@ -591,7 +591,7 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 		await expect(synthesis).rejects.toMatchObject({
 			code: "EXTERNAL_TTS_UPSTREAM",
 			message: "External TTS websocket request failed: no available account",
-			retryable: false,
+			retryable: true,
 		});
 	});
 });
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index 9eb4a3f..980e722 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -286,7 +286,6 @@ function getResponsesWebSocketCloseRetryable({
 	const normalizedReason = reason.trim().toLowerCase();
 
 	if (
-		normalizedReason.includes("no available account") ||
 		normalizedReason.includes("required") ||
 		normalizedReason.includes("unsupported")
 	) {
diff --git a/apps/web/src/lib/tts/provider.test.ts b/apps/web/src/lib/tts/provider.test.ts
index 3269ce9..e214fc5 100644
--- a/apps/web/src/lib/tts/provider.test.ts
+++ b/apps/web/src/lib/tts/provider.test.ts
@@ -113,7 +113,36 @@ describe("synthesizeSpeechWithFallback", () => {
 		expect(legacyCalled).toBe(false);
 	});
 
-	test("does not fall back when the external provider returns a non-audio success response", async () => {
+	test("falls back when the external provider reports no available account", async () => {
+		let legacyCalled = false;
+
+		const result = await synthesizeSpeechWithFallback({
+			env: {
+				API_BASE_URL: "https://example.com/v1",
+				API_MODEL: "tts-1",
+				API_KEY: "secret",
+			},
+			text: "hello",
+			voice: "default",
+			openAiSynthesize: async () => {
+				throw new TtsError({
+					code: "EXTERNAL_TTS_UPSTREAM",
+					message:
+						"External TTS websocket request failed: no available account",
+					retryable: true,
+				});
+			},
+			legacySynthesize: async () => {
+				legacyCalled = true;
+				return Uint8Array.from([7, 8, 9]).buffer;
+			},
+		});
+
+		expect(Array.from(new Uint8Array(result))).toEqual([7, 8, 9]);
+		expect(legacyCalled).toBe(true);
+	});
+
+	test("does not fall back when the external provider returns a non-retryable websocket account error", async () => {
 		let legacyCalled = false;
 
 		await expect(
diff --git a/docs/plans/2026-03-17-tts-external-provider-design.md b/docs/plans/2026-03-17-tts-external-provider-design.md
index cadce47..c2bf239 100644
--- a/docs/plans/2026-03-17-tts-external-provider-design.md
+++ b/docs/plans/2026-03-17-tts-external-provider-design.md
@@ -104,7 +104,7 @@
 
 - 请求参数非法：返回 `400`
 - TTS 环境变量缺失：返回 `500`，信息明确为未配置
-- 外部 provider 返回可重试的 `EXTERNAL_TTS_UPSTREAM`（例如超时、`429`、`5xx`）时，会先回退到 legacy provider
+- 外部 provider 返回可重试的 `EXTERNAL_TTS_UPSTREAM`（例如超时、`429`、`5xx`，或 websocket `no available account` 这类账号容量耗尽）时，会先回退到 legacy provider
   - legacy 回退成功：最终仍可能返回 `200`
   - legacy 回退失败：最终返回 `502`
 - 外部 provider 返回不可重试的 `EXTERNAL_TTS_UPSTREAM`（例如 `401`/`403`/`404`、空音频、非音频响应）时：直接返回 `502`

From 5750a17e6fd46b7b400760c2881f91a53230e809 Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Fri, 20 Mar 2026 03:31:33 +0800
Subject: [PATCH 20/22] fix(tts): address remaining rework review comments

Summary:
- cancel unconsumed external TTS responses before retry and websocket
  fallback branches
- add regression tests that assert response cleanup on 404, html
  fallback, and non-audio success responses
- keep example external TTS env vars disabled by default and extract a
  shared metadata-route helper in legacy tests

Rationale:
- releasing abandoned response bodies avoids tying up fetch connections
  while probing multiple upstream endpoints
- disabling placeholder external env vars prevents fresh setups from
  accidentally enabling the external path against a fake host
- absorbing the remaining test helper nit removes repeated brittle
  string matching from the legacy suite

Tests:
- bun test apps/web/src/lib/tts/fetch-with-timeout.test.ts apps/web/src/lib/tts/legacy.test.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/provider.test.ts apps/web/src/lib/tts/service.test.ts apps/web/src/app/api/tts/generate/route.test.ts
- bunx tsc -p apps/web/tsconfig.json --noEmit
- bunx @biomejs/biome check apps/web/src/lib/tts/openai-compatible.ts apps/web/src/lib/tts/openai-compatible.test.ts apps/web/src/lib/tts/legacy.test.ts
- cd apps/web && NEXT_PUBLIC_SITE_URL=http://localhost:4100 UPSTASH_REDIS_REST_URL=https://example.com UPSTASH_REDIS_REST_TOKEN=dummy NODE_ENV=production bun run build
- git diff --check

Co-authored-by: Codex <codex@openai.com>
---
 apps/web/.env.example                         |  6 +--
 apps/web/src/lib/tts/legacy.test.ts           | 15 ++++--
 .../web/src/lib/tts/openai-compatible.test.ts | 47 ++++++++++++++++---
 apps/web/src/lib/tts/openai-compatible.ts     | 16 +++++++
 4 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/apps/web/.env.example b/apps/web/.env.example
index 0b78b6c..cd7f63d 100644
--- a/apps/web/.env.example
+++ b/apps/web/.env.example
@@ -18,9 +18,9 @@ FREESOUND_API_KEY=your_api_key_here
 
 # Optional: external OpenAI-compatible TTS provider
 # Preferred namespaced variables:
-EXTERNAL_TTS_API_BASE_URL=https://your-tts-provider.example.com/v1
-EXTERNAL_TTS_API_MODEL=your_tts_model
-EXTERNAL_TTS_API_KEY=your_tts_api_key
+# EXTERNAL_TTS_API_BASE_URL=https://your-tts-provider.example.com/v1
+# EXTERNAL_TTS_API_MODEL=your_tts_model
+# EXTERNAL_TTS_API_KEY=your_tts_api_key
 # Compatibility aliases used when EXTERNAL_TTS_* is absent:
 # API_BASE_URL=https://your-shared-api.example.com/v1
 # API_MODEL=your_tts_model
diff --git a/apps/web/src/lib/tts/legacy.test.ts b/apps/web/src/lib/tts/legacy.test.ts
index 950a746..c3e14ac 100644
--- a/apps/web/src/lib/tts/legacy.test.ts
+++ b/apps/web/src/lib/tts/legacy.test.ts
@@ -4,6 +4,7 @@ import { synthesizeSpeechWithLegacyProvider } from "./legacy";
 describe("synthesizeSpeechWithLegacyProvider", () => {
 	const TEST_TIMEOUT_MS = 50;
 	const LEGACY_AUDIO_URL = "https://api.milorapart.top/voice/test.mp3";
+	const LEGACY_METADATA_ROUTE = "/apis/mbAIsc?";
 
 	function legacyMetadataOk(url = LEGACY_AUDIO_URL): Response {
 		return Response.json({
@@ -12,6 +13,10 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 		});
 	}
 
+	function isLegacyMetadataRequest(input: RequestInfo | URL): boolean {
+		return String(input).includes(LEGACY_METADATA_ROUTE);
+	}
+
 	test("rejects audio urls outside the expected https host allowlist", async () => {
 		const calls: string[] = [];
 
@@ -33,7 +38,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 			synthesizeSpeechWithLegacyProvider({
 				text: "hello",
 				fetchImpl: async (input) => {
-					if (String(input).includes("/apis/mbAIsc?")) {
+					if (isLegacyMetadataRequest(input)) {
 						return legacyMetadataOk();
 					}
 
@@ -51,7 +56,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 			synthesizeSpeechWithLegacyProvider({
 				text: "hello",
 				fetchImpl: async (input) => {
-					if (String(input).includes("/apis/mbAIsc?")) {
+					if (isLegacyMetadataRequest(input)) {
 						return legacyMetadataOk();
 					}
 
@@ -67,7 +72,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 		const audio = await synthesizeSpeechWithLegacyProvider({
 			text: "hello",
 			fetchImpl: async (input) => {
-				if (String(input).includes("/apis/mbAIsc?")) {
+				if (isLegacyMetadataRequest(input)) {
 					return legacyMetadataOk();
 				}
 
@@ -88,7 +93,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 			synthesizeSpeechWithLegacyProvider({
 				text: "hello",
 				fetchImpl: async (input, init) => {
-					if (String(input).includes("/apis/mbAIsc?")) {
+					if (isLegacyMetadataRequest(input)) {
 						return legacyMetadataOk();
 					}
 
@@ -116,7 +121,7 @@ describe("synthesizeSpeechWithLegacyProvider", () => {
 		const audio = await synthesizeSpeechWithLegacyProvider({
 			text: "hello",
 			fetchImpl: async (input, init) => {
-				if (String(input).includes("/apis/mbAIsc?")) {
+				if (isLegacyMetadataRequest(input)) {
 					return legacyMetadataOk();
 				}
 
diff --git a/apps/web/src/lib/tts/openai-compatible.test.ts b/apps/web/src/lib/tts/openai-compatible.test.ts
index 3fc238a..c7608fd 100644
--- a/apps/web/src/lib/tts/openai-compatible.test.ts
+++ b/apps/web/src/lib/tts/openai-compatible.test.ts
@@ -238,6 +238,7 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 
 	test("falls back to the root audio speech path when the v1 path returns 404", async () => {
 		const calls: string[] = [];
+		const cancelledResponses: string[] = [];
 
 		const audio = await synthesizeSpeechWithOpenAiCompatible({
 			config: {
@@ -252,7 +253,16 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 				calls.push(url);
 
 				if (url === "https://example.com/v1/audio/speech") {
-					return new Response("page not found", { status: 404 });
+					return {
+						body: {
+							cancel: async () => {
+								cancelledResponses.push(url);
+							},
+						},
+						headers: new Headers(),
+						ok: false,
+						status: 404,
+					} as Response;
 				}
 
 				return new Response(Uint8Array.from([9, 8, 7]), {
@@ -267,6 +277,7 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 			"https://example.com/v1/audio/speech",
 			"https://example.com/audio/speech",
 		]);
+		expect(cancelledResponses).toEqual(["https://example.com/v1/audio/speech"]);
 	});
 
 	test("tries the /v1 speech endpoint first when the base url is root-level", async () => {
@@ -300,6 +311,8 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 	});
 
 	test("rejects non-audio success responses", async () => {
+		let cancelCalled = false;
+
 		await expect(
 			synthesizeSpeechWithOpenAiCompatible({
 				config: {
@@ -310,12 +323,22 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 				text: "hello",
 				voice: "default",
 				fetchImpl: async () =>
-					new Response("not audio", {
+					({
+						body: {
+							cancel: async () => {
+								cancelCalled = true;
+							},
+						},
+						headers: new Headers({
+							"Content-Type": "text/plain; charset=utf-8",
+						}),
+						ok: true,
 						status: 200,
-						headers: { "Content-Type": "text/plain; charset=utf-8" },
-					}),
+					}) as Response,
 			}),
 		).rejects.toThrow("Expected audio response");
+
+		expect(cancelCalled).toBe(true);
 	});
 
 	test("rejects success responses when the content-type header is missing", async () => {
@@ -525,6 +548,7 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 
 	test("falls back to /responses websocket audio when /audio/speech returns html", async () => {
 		const sockets: FakeWebSocket[] = [];
+		let cancelCalled = false;
 		const synthesis = synthesizeSpeechWithOpenAiCompatible({
 			config: {
 				apiBaseUrl: "https://example.com/v1",
@@ -539,13 +563,22 @@ describe("synthesizeSpeechWithOpenAiCompatible", () => {
 				return socket;
 			},
 			fetchImpl: async () =>
-				new Response("<!doctype html>", {
+				({
+					body: {
+						cancel: async () => {
+							cancelCalled = true;
+						},
+					},
+					headers: new Headers({
+						"Content-Type": "text/html; charset=utf-8",
+					}),
+					ok: true,
 					status: 200,
-					headers: { "Content-Type": "text/html; charset=utf-8" },
-				}),
+				}) as Response,
 		});
 		await new Promise((resolve) => setTimeout(resolve, 0));
 
+		expect(cancelCalled).toBe(true);
 		sockets[0]?.emit("open");
 		sockets[0]?.emit("message", {
 			data: JSON.stringify({
diff --git a/apps/web/src/lib/tts/openai-compatible.ts b/apps/web/src/lib/tts/openai-compatible.ts
index 980e722..270c03f 100644
--- a/apps/web/src/lib/tts/openai-compatible.ts
+++ b/apps/web/src/lib/tts/openai-compatible.ts
@@ -276,6 +276,18 @@ function shouldTryResponsesWebSocket({
 	return mimeType === "text/html";
 }
 
+async function cancelResponseBody({
+	response,
+}: {
+	response: Response;
+}): Promise<void> {
+	try {
+		await response.body?.cancel();
+	} catch {
+		// Best-effort cleanup only.
+	}
+}
+
 function getResponsesWebSocketCloseRetryable({
 	code,
 	reason,
@@ -610,9 +622,11 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 			if (!isAudioContentType({ contentType })) {
 				if (shouldTryResponsesWebSocket({ response })) {
 					lastErrorResponse = response;
+					await cancelResponseBody({ response });
 					break;
 				}
 
+				await cancelResponseBody({ response });
 				throw new TtsError({
 					code: "EXTERNAL_TTS_UPSTREAM",
 					message: `Expected audio response, received ${contentType || "(no content-type)"}`,
@@ -651,6 +665,8 @@ export async function synthesizeSpeechWithOpenAiCompatible({
 		if (response.status !== 404) {
 			break;
 		}
+
+		await cancelResponseBody({ response });
 	}
 
 	if (

From 5fb0c77530421b4db3edf6235898723df24a7624 Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Fri, 20 Mar 2026 22:30:57 +0800
Subject: [PATCH 21/22] docs(tts): clarify external model requirements

Summary:
- recommend a concrete TTS-capable model in README and env example
- document that shared API_MODEL aliases may point at non-TTS models

Rationale:
- fresh live probes showed the current alias model is not a clear TTS model
- clearer config guidance reduces false negatives during runtime validation

Tests:
- git diff --check

Co-authored-by: Codex <codex@openai.com>
---
 README.md             | 7 ++++++-
 apps/web/.env.example | 4 +++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 7d43b9b..46fdcd9 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ Optional TTS env values:
 
 ```bash
 EXTERNAL_TTS_API_BASE_URL="https://your-tts-provider.example.com/v1"
-EXTERNAL_TTS_API_MODEL="your_tts_model"
+EXTERNAL_TTS_API_MODEL="gpt-4o-mini-tts"
 EXTERNAL_TTS_API_KEY="your_tts_api_key"
 ```
 
@@ -84,6 +84,11 @@ Cutia prefers `EXTERNAL_TTS_API_*` for external speech synthesis. The legacy
 `API_BASE_URL` / `API_MODEL` / `API_KEY` names are still accepted as
 compatibility aliases when the namespaced variables are absent.
 
+Use a provider-supported TTS model for `EXTERNAL_TTS_API_MODEL` (for example
+`gpt-4o-mini-tts` or another audio-output model that your provider actually
+supports). The shared `API_MODEL` alias is only a migration fallback and may
+already point at a non-TTS chat model in your environment.
+
 To verify that the configured provider can actually return audio, run:
 
 ```bash
diff --git a/apps/web/.env.example b/apps/web/.env.example
index cd7f63d..00af487 100644
--- a/apps/web/.env.example
+++ b/apps/web/.env.example
@@ -19,8 +19,10 @@ FREESOUND_API_KEY=your_api_key_here
 # Optional: external OpenAI-compatible TTS provider
 # Preferred namespaced variables:
 # EXTERNAL_TTS_API_BASE_URL=https://your-tts-provider.example.com/v1
-# EXTERNAL_TTS_API_MODEL=your_tts_model
+# EXTERNAL_TTS_API_MODEL=gpt-4o-mini-tts
 # EXTERNAL_TTS_API_KEY=your_tts_api_key
+# Use a provider-supported audio/TTS model here. Shared API_MODEL values are
+# often general chat models and may not work for speech generation.
 # Compatibility aliases used when EXTERNAL_TTS_* is absent:
 # API_BASE_URL=https://your-shared-api.example.com/v1
 # API_MODEL=your_tts_model

From 2e71fe384548767c6231036412c149a04894af10 Mon Sep 17 00:00:00 2001
From: tianhei <tianheilene@gmail.com>
Date: Fri, 20 Mar 2026 22:42:43 +0800
Subject: [PATCH 22/22] docs(tts): add provider capability troubleshooting

Summary:
- document the provider-side prerequisites for successful TTS probes
- explain how /models and audio endpoints affect runtime validation

Rationale:
- recent investigation showed live probe failures were caused by provider
  capabilities and upstream availability, not local code regressions
- keeping that guidance in repo docs reduces repeated misdiagnosis

Tests:
- git diff --check

Co-authored-by: Codex <codex@openai.com>
---
 README.md             | 11 +++++++++++
 apps/web/.env.example |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/README.md b/README.md
index 46fdcd9..89fb315 100644
--- a/README.md
+++ b/README.md
@@ -89,6 +89,17 @@ Use a provider-supported TTS model for `EXTERNAL_TTS_API_MODEL` (for example
 supports). The shared `API_MODEL` alias is only a migration fallback and may
 already point at a non-TTS chat model in your environment.
 
+Before treating a failed probe as a code regression, confirm the provider
+itself is TTS-capable for the current credentials:
+
+- `/models` should list the configured TTS model or another audio-capable model
+- either `/audio/speech` must return audio directly, or `/responses` must accept
+  audio output requests for the configured model
+- if `/audio/speech` returns `404` and `/models` contains only chat/text models,
+  the provider is not exposing a usable TTS surface for this environment
+- legacy fallback is best-effort only; if the legacy upstream is unavailable,
+  route probes will still return `502`
+
 To verify that the configured provider can actually return audio, run:
 
 ```bash
diff --git a/apps/web/.env.example b/apps/web/.env.example
index 00af487..0de5987 100644
--- a/apps/web/.env.example
+++ b/apps/web/.env.example
@@ -23,6 +23,8 @@ FREESOUND_API_KEY=your_api_key_here
 # EXTERNAL_TTS_API_KEY=your_tts_api_key
 # Use a provider-supported audio/TTS model here. Shared API_MODEL values are
 # often general chat models and may not work for speech generation.
+# The provider should also expose that model from /models and support either
+# /audio/speech or /responses audio output for the same credentials.
 # Compatibility aliases used when EXTERNAL_TTS_* is absent:
 # API_BASE_URL=https://your-shared-api.example.com/v1
 # API_MODEL=your_tts_model