From a3b589bbb1ba93a3483e76e37cd13be6fa41a3dd Mon Sep 17 00:00:00 2001 From: voorhs Date: Mon, 22 Jun 2026 01:18:57 +0300 Subject: [PATCH] test: cover basic/evolution augmentation CLIs; fix broken console-script paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tests driving the real `main()` of the `basic-aug` and `evolution-aug` CLIs (previously 0%). Every collaborator (load_dataset, Generator, synthesizer templates, UtteranceGenerator/UtteranceEvolver/IncrementalUtteranceEvolver) is mocked at the cli module namespace, so argument parsing and control flow run for real without any dataset, network, or LLM access. Covers the en/ru template branches, the regular-vs-incremental evolver branch, async-mode forwarding, and the optional push-to-hub path. Also fix the `[project.scripts]` entry points, which pointed at `...utterances.basic.cli` / `...utterances.evolution.cli` — modules that do not exist (the packages are `_basic` / `_evolution`). As shipped, both console scripts fail with ModuleNotFoundError; corrected to the real `_`-prefixed paths. Co-Authored-By: Claude Opus 4.8 --- pyproject.toml | 4 +- tests/generation/utterances/test_basic_cli.py | 91 ++++++++++++++++++ .../utterances/test_evolution_cli.py | 93 +++++++++++++++++++ 3 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 tests/generation/utterances/test_basic_cli.py create mode 100644 tests/generation/utterances/test_evolution_cli.py diff --git a/pyproject.toml b/pyproject.toml index ddad7bffe..bc0745c0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -142,8 +142,8 @@ Repository = "https://github.com/deeppavlov/AutoIntent" Documentation = "https://deeppavlov.github.io/AutoIntent/" [project.scripts] -"basic-aug" = "autointent.generation.utterances.basic.cli:main" -"evolution-aug" = "autointent.generation.utterances.evolution.cli:main" +"basic-aug" = "autointent.generation.utterances._basic.cli:main" +"evolution-aug" = "autointent.generation.utterances._evolution.cli:main" [build-system] requires = ["uv_build>=0.8.7,<0.9.0"] diff --git a/tests/generation/utterances/test_basic_cli.py b/tests/generation/utterances/test_basic_cli.py new file mode 100644 index 000000000..0cd3250d6 --- /dev/null +++ b/tests/generation/utterances/test_basic_cli.py @@ -0,0 +1,91 @@ +"""Tests for the ``basic-aug`` console-script entry point (``_basic.cli.main``). + +The CLI wires together dataset loading, a synthesizer template, and the LLM +``UtteranceGenerator``. Every collaborator is replaced with a mock at the cli +module namespace, so this drives the real argument parsing and control flow +without any dataset, network, or LLM access. +""" + +from __future__ import annotations + +import sys +from types import SimpleNamespace +from typing import TYPE_CHECKING +from unittest.mock import MagicMock + +import autointent.generation.utterances._basic.cli as basic_cli + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +def _patch(monkeypatch: pytest.MonkeyPatch, dataset: MagicMock, augmented: list[str]) -> SimpleNamespace: + mocks = SimpleNamespace( + load_dataset=MagicMock(return_value=dataset), + generator_cls=MagicMock(), + english_tpl=MagicMock(), + russian_tpl=MagicMock(), + utt_gen_cls=MagicMock(), + ) + mocks.utt_gen_cls.return_value.augment.return_value = augmented + monkeypatch.setattr(basic_cli, "load_dataset", mocks.load_dataset) + monkeypatch.setattr(basic_cli, "Generator", mocks.generator_cls) + monkeypatch.setattr(basic_cli, "EnglishSynthesizerTemplate", mocks.english_tpl) + monkeypatch.setattr(basic_cli, "RussianSynthesizerTemplate", mocks.russian_tpl) + monkeypatch.setattr(basic_cli, "UtteranceGenerator", mocks.utt_gen_cls) + return mocks + + +def test_basic_cli_english_no_repo(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + dataset = MagicMock() + dataset.__getitem__.return_value = ["u1", "u2", "u3"] + mocks = _patch(monkeypatch, dataset, ["new1", "new2"]) + out = tmp_path / "out.json" + + monkeypatch.setattr( + sys, + "argv", + ["basic-aug", "--input-path", "in.json", "--output-path", str(out), "--language", "en"], + ) + basic_cli.main() + + mocks.load_dataset.assert_called_once_with("in.json") + mocks.english_tpl.assert_called_once() + mocks.russian_tpl.assert_not_called() + mocks.utt_gen_cls.return_value.augment.assert_called_once() + dataset.to_json.assert_called_once_with(str(out)) + dataset.push_to_hub.assert_not_called() + + +def test_basic_cli_russian_async_with_repo(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + dataset = MagicMock() + dataset.__getitem__.return_value = ["u1"] + mocks = _patch(monkeypatch, dataset, []) + out = tmp_path / "out.json" + + monkeypatch.setattr( + sys, + "argv", + [ + "basic-aug", + "--input-path", + "in.json", + "--output-path", + str(out), + "--language", + "ru", + "--output-repo", + "me/repo", + "--private", + "--async-mode", + ], + ) + basic_cli.main() + + mocks.russian_tpl.assert_called_once() + mocks.english_tpl.assert_not_called() + # the --async-mode flag is forwarded to the generator + assert mocks.utt_gen_cls.call_args.kwargs["async_mode"] is True + dataset.push_to_hub.assert_called_once_with("me/repo", private=True) diff --git a/tests/generation/utterances/test_evolution_cli.py b/tests/generation/utterances/test_evolution_cli.py new file mode 100644 index 000000000..a0d9b704a --- /dev/null +++ b/tests/generation/utterances/test_evolution_cli.py @@ -0,0 +1,93 @@ +"""Tests for the ``evolution-aug`` console-script entry point (``_evolution.cli.main``). + +Collaborators (dataset loading, the LLM ``Generator``, and both evolver classes) +are mocked at the cli module namespace; the real ``EVOLUTION_MAPPING`` lookup and +argument parsing run for real. No dataset, network, or LLM access. +""" + +from __future__ import annotations + +import sys +from types import SimpleNamespace +from typing import TYPE_CHECKING +from unittest.mock import MagicMock + +import autointent.generation.utterances._evolution.cli as evolution_cli + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +def _patch(monkeypatch: pytest.MonkeyPatch, dataset: MagicMock, augmented: list[str]) -> SimpleNamespace: + mocks = SimpleNamespace( + load_dataset=MagicMock(return_value=dataset), + generator_cls=MagicMock(), + evolver_cls=MagicMock(), + incremental_cls=MagicMock(), + ) + mocks.evolver_cls.return_value.augment.return_value = augmented + mocks.incremental_cls.return_value.augment.return_value = augmented + monkeypatch.setattr(evolution_cli, "load_dataset", mocks.load_dataset) + monkeypatch.setattr(evolution_cli, "Generator", mocks.generator_cls) + monkeypatch.setattr(evolution_cli, "UtteranceEvolver", mocks.evolver_cls) + monkeypatch.setattr(evolution_cli, "IncrementalUtteranceEvolver", mocks.incremental_cls) + return mocks + + +def test_evolution_cli_regular_evolver(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + dataset = MagicMock() + dataset.__getitem__.return_value = ["u1", "u2"] + mocks = _patch(monkeypatch, dataset, ["new1"]) + out = tmp_path / "out.json" + + monkeypatch.setattr( + sys, + "argv", + ["evolution-aug", "--input-path", "in.json", "--output-path", str(out), "--template", "abstract"], + ) + evolution_cli.main() + + mocks.evolver_cls.assert_called_once() + mocks.incremental_cls.assert_not_called() + mocks.evolver_cls.return_value.augment.assert_called_once() + dataset.to_json.assert_called_once_with(str(out)) + dataset.push_to_hub.assert_not_called() + + +def test_evolution_cli_incremental_with_repo(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + dataset = MagicMock() + dataset.__getitem__.return_value = ["u1"] + mocks = _patch(monkeypatch, dataset, []) + out = tmp_path / "out.json" + + monkeypatch.setattr( + sys, + "argv", + [ + "evolution-aug", + "--input-path", + "in.json", + "--output-path", + str(out), + "--template", + "abstract", + "--decide-for-me", + "--n-evolutions", + "2", + "--sequential", + "--async-mode", + "--output-repo", + "me/repo", + "--private", + ], + ) + evolution_cli.main() + + mocks.incremental_cls.assert_called_once() + mocks.evolver_cls.assert_not_called() + augment_kwargs = mocks.incremental_cls.return_value.augment.call_args.kwargs + assert augment_kwargs["sequential"] is True + assert augment_kwargs["n_evolutions"] == 2 + dataset.push_to_hub.assert_called_once_with("me/repo", True)