Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion astrbot/core/provider/sources/whisper_api_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from astrbot.core import logger
from astrbot.core.utils.astrbot_path import get_astrbot_temp_path
from astrbot.core.utils.io import download_file
from astrbot.core.utils.media_utils import convert_audio_to_wav
from astrbot.core.utils.tencent_record_helper import (
convert_to_pcm_wav,
tencent_silk_to_wav,
Expand Down Expand Up @@ -76,7 +77,18 @@ async def get_text(self, audio_url: str) -> str:
if not os.path.exists(audio_url):
raise FileNotFoundError(f"文件不存在: {audio_url}")

if audio_url.endswith(".amr") or audio_url.endswith(".silk") or is_tencent:
lower_audio_url = audio_url.lower()

if lower_audio_url.endswith(".opus"):
temp_dir = get_astrbot_temp_path()
output_path = os.path.join(
temp_dir,
f"whisper_api_{uuid.uuid4().hex[:8]}.wav",
)
Comment on lines +83 to +87
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This block for generating a temporary output path is identical to the one on lines 96-100. To improve maintainability, consider refactoring this duplicated logic. You could, for example, move the path generation logic to before the if/elif block, to be executed once if any conversion is needed.

logger.info("Converting opus file to wav using convert_audio_to_wav...")
await convert_audio_to_wav(audio_url, output_path)
audio_url = output_path
elif lower_audio_url.endswith(".amr") or lower_audio_url.endswith(".silk") or is_tencent:
Comment on lines +82 to +91
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: 请澄清在 is_tencent 为 true 时对 .opus 文件的处理行为,以避免潜在的边缘情况。

目前 .opus 文件总是会先被转换为 WAV,因此 elif 分支(包括对 is_tencent 的检查)对它们来说永远不会执行。如果以后腾讯发送的 .opus 输入应当像 .amr/.silk 那样处理,这种行为可能会让人意外。请确认 is_tencent 是否应该影响 .opus 的处理(例如使用 if lower_audio_url.endswith(".opus") and not is_tencent: 之类的条件),以便让面向特定服务商的意图更明确,并避免未来的边缘问题。

Original comment in English

question: Clarify behavior for .opus files when is_tencent is true to avoid potential edge cases.

Currently, .opus files always go through conversion to WAV, so the elif (including the is_tencent check) never runs for them. If Tencent ever sends .opus inputs that should be treated like .amr/.silk, this behavior could be surprising. Please confirm whether is_tencent should affect .opus handling (e.g., if lower_audio_url.endswith(".opus") and not is_tencent: or similar) so the provider-specific intent is explicit and future edge cases are avoided.

file_format = await self._get_audio_format(audio_url)

# 判断是否需要转换
Expand Down
72 changes: 72 additions & 0 deletions tests/test_whisper_api_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import AsyncMock

import pytest

from astrbot.core.provider.sources.whisper_api_source import ProviderOpenAIWhisperAPI


def _make_provider() -> ProviderOpenAIWhisperAPI:
provider = ProviderOpenAIWhisperAPI(
provider_config={
"id": "test-whisper-api",
"type": "openai_whisper_api",
"model": "whisper-1",
"api_key": "test-key",
},
provider_settings={},
)
provider.client = SimpleNamespace(
audio=SimpleNamespace(
transcriptions=SimpleNamespace(
create=AsyncMock(return_value=SimpleNamespace(text="transcribed text"))
)
),
close=AsyncMock(),
)
return provider


@pytest.mark.asyncio
async def test_get_text_converts_opus_files_to_wav_before_transcription(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
):
provider = _make_provider()
opus_path = tmp_path / "voice.opus"
opus_path.write_bytes(b"fake opus data")

conversions: list[tuple[str, str]] = []

async def fake_convert_audio_to_wav(audio_path: str, output_path: str | None = None):
assert output_path is not None
conversions.append((audio_path, output_path))
Path(output_path).write_bytes(b"fake wav data")
return output_path

monkeypatch.setattr(
"astrbot.core.provider.sources.whisper_api_source.get_astrbot_temp_path",
lambda: str(tmp_path),
)
monkeypatch.setattr(
"astrbot.core.provider.sources.whisper_api_source.convert_audio_to_wav",
fake_convert_audio_to_wav,
)

try:
result = await provider.get_text(str(opus_path))

assert result == "transcribed text"
assert conversions and conversions[0][0] == str(opus_path)
converted_path = Path(conversions[0][1])
assert converted_path.suffix == ".wav"
assert not converted_path.exists()

create_mock = provider.client.audio.transcriptions.create
create_mock.assert_awaited_once()
file_arg = create_mock.await_args.kwargs["file"]
assert file_arg[0] == "audio.wav"
assert file_arg[1].name.endswith(".wav")
file_arg[1].close()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This explicit close() call reveals a potential resource leak in the production code (whisper_api_source.py, line 117). The file is opened with open() but the handle is not closed, which can lead to file descriptor exhaustion in a long-running application. This should be fixed by using a with statement to ensure the file is automatically closed.

Example fix:

with open(audio_url, "rb") as audio_file:
    result = await self.client.audio.transcriptions.create(
        model=self.model_name,
        file=("audio.wav", audio_file),
    )

finally:
await provider.terminate()