diff --git a/README.md b/README.md index a379f7e..85b8711 100644 --- a/README.md +++ b/README.md @@ -294,9 +294,9 @@ sandbox: backend: firejail roles: - planner: ["python3", "roles/planner.py"] - executor: ["bash", "roles/executor.sh"] - evaluator: ["python3", "roles/evaluator.py"] + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] EOF # 3. Run overnight @@ -367,9 +367,12 @@ sandbox: extra_args: [] # appended verbatim before `--` roles: - planner: ["python3", "roles/planner.py"] - executor: ["bash", "roles/executor.sh"] - evaluator: ["python3", "roles/evaluator.py"] + # `bundled:` resolves to evolution_kernel/roles/ inside the installed + # wheel — works for both `pip install evolution-kernel` and git-clone setups. + # Or replace any entry with your own argv (`["python3", "myplanner.py"]`). + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] ``` **Switch to OpenAI:** @@ -443,7 +446,7 @@ Each role is an executable that receives: --worktree path to the isolated git sandbox checkout ``` -`roles/planner.py`, `roles/executor.sh`, and `roles/evaluator.py` are the reference implementation. Copy, modify, or replace them entirely — with a shell script, a Docker call, or anything that reads `--input` and writes `--output`. +The bundled `evolution_kernel/roles/planner.py`, `executor.sh`, and `evaluator.py` are the reference implementation, shipped inside the wheel — reference them in your `evolution.yml` as `bundled:`. Copy, modify, or replace them entirely — with a shell script, a Docker call, or anything that reads `--input` and writes `--output`. --- @@ -462,12 +465,12 @@ Being honest about where v1.0 is *not* yet. ## Project layout ``` -evolution_kernel/ ~1,900-line runtime (Governor · Observer · HardStops · Sandbox · Config · CLI · Scope) -roles/ reference planner, executor, evaluator, goal_evaluator, strategist -examples/ demo target + sandbox demo + working evolution.yml -docs/ protocol spec + first-task spec -tests/ 99 unit + acceptance tests · 14 fixture role scripts -evidence/ checked-in artifacts of runs anyone can reproduce +evolution_kernel/ ~1,900-line runtime (Governor · Observer · HardStops · Sandbox · Config · CLI · Scope) +evolution_kernel/roles/ reference planner, executor, evaluator, goal_evaluator, strategist — bundled in wheel, addressable as `bundled:` +examples/ demo target + sandbox demo + working evolution.yml +docs/ protocol spec + first-task spec +tests/ 99 unit + acceptance tests · 14 fixture role scripts +evidence/ checked-in artifacts of runs anyone can reproduce ``` --- diff --git a/README.zh.md b/README.zh.md index 16f2c7c..42e2887 100644 --- a/README.zh.md +++ b/README.zh.md @@ -290,9 +290,9 @@ sandbox: backend: firejail roles: - planner: ["python3", "roles/planner.py"] - executor: ["bash", "roles/executor.sh"] - evaluator: ["python3", "roles/evaluator.py"] + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] EOF # 3. 跑一晚上,放着不管 @@ -360,9 +360,12 @@ sandbox: extra_args: [] # 追加到 firejail 命令的额外参数(在 `--` 之前) roles: - planner: ["python3", "roles/planner.py"] - executor: ["bash", "roles/executor.sh"] - evaluator: ["python3", "roles/evaluator.py"] + # `bundled:` 自动解析到已安装的 wheel 里 evolution_kernel/roles/, + # `pip install evolution-kernel` 和 git-clone 两种安装方式都能直接跑。 + # 也可以把任意一项换成你自己的 argv(如 `["python3", "myplanner.py"]`)。 + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] ``` **切换到 OpenAI:** @@ -436,7 +439,7 @@ python3 -m pytest tests/ -v --worktree 隔离 git 沙箱 checkout 的路径 ``` -`roles/planner.py`、`roles/executor.sh`、`roles/evaluator.py` 是参考实现。复制、改写、或者完全替换——shell 脚本、Docker 调用、任何能读 `--input` 写 `--output` 的东西都行。 +bundled 的 `evolution_kernel/roles/planner.py`、`executor.sh`、`evaluator.py` 是参考实现,随 wheel 一起发出去——在 `evolution.yml` 里用 `bundled:` 引用即可。复制、改写、或者完全替换——shell 脚本、Docker 调用、任何能读 `--input` 写 `--output` 的东西都行。 --- @@ -455,12 +458,12 @@ python3 -m pytest tests/ -v ## 项目结构 ``` -evolution_kernel/ ~1,900 行 runtime(Governor · Observer · HardStops · Sandbox · Config · CLI · Scope) -roles/ 参考规划器 / 执行器 / 评估器 / 目标评估器 / 策略师 -examples/ demo 目标 + sandbox demo + 可直接运行的 evolution.yml -docs/ 协议规范 + 第一个进化任务规范 -tests/ 99 个单元 + 验收测试 · 14 个 fixture 角色脚本 -evidence/ checked-in 的可复现运行 artifact +evolution_kernel/ ~1,900 行 runtime(Governor · Observer · HardStops · Sandbox · Config · CLI · Scope) +evolution_kernel/roles/ 参考规划器 / 执行器 / 评估器 / 目标评估器 / 策略师 —— 随 wheel 发,用 `bundled:` 引用 +examples/ demo 目标 + sandbox demo + 可直接运行的 evolution.yml +docs/ 协议规范 + 第一个进化任务规范 +tests/ 99 个单元 + 验收测试 · 14 个 fixture 角色脚本 +evidence/ checked-in 的可复现运行 artifact ``` --- diff --git a/evolution_kernel/_bundled.py b/evolution_kernel/_bundled.py new file mode 100644 index 0000000..21cba7c --- /dev/null +++ b/evolution_kernel/_bundled.py @@ -0,0 +1,40 @@ +"""Resolve `bundled:` argv entries to absolute paths inside the wheel. + +The reference roles ship inside the installed package at +`evolution_kernel/roles/`. Templates and example configs reference them +through the `bundled:` prefix so the same config works for both +`pip install evolution-kernel` users and `git clone` developers. +""" +from __future__ import annotations + +from importlib.resources import as_file, files +from pathlib import Path + +BUNDLED_PREFIX = "bundled:" + + +def resolve_bundled(arg: str) -> str: + """If `arg` starts with `bundled:`, return the absolute path to the + matching file inside `evolution_kernel/roles/`. Otherwise return `arg` + unchanged. + + Raises FileNotFoundError when the prefix is used but the file does + not exist in the bundle (clearer signal than a downstream + `subprocess` error). + """ + if not arg.startswith(BUNDLED_PREFIX): + return arg + name = arg[len(BUNDLED_PREFIX):] + if not name or "/" in name or "\\" in name: + raise ValueError( + f"bundled: prefix takes a bare filename, got {arg!r}" + ) + resource = files("evolution_kernel").joinpath("roles", name) + with as_file(resource) as path: + resolved = Path(path) + if not resolved.exists(): + raise FileNotFoundError( + f"bundled role {name!r} not found in evolution_kernel.roles " + f"(looked at {resolved})" + ) + return str(resolved.resolve()) diff --git a/evolution_kernel/config.py b/evolution_kernel/config.py index 2c795cf..4e62450 100644 --- a/evolution_kernel/config.py +++ b/evolution_kernel/config.py @@ -295,17 +295,24 @@ def _parse_roles(value: Any) -> Roles: if not value: return Roles() + from ._bundled import resolve_bundled + def _argv(label: str) -> tuple[str, ...]: v = value.get(label) if v is None: return () if isinstance(v, str): - return (v,) - if isinstance(v, list) and all(isinstance(x, str) and x.strip() for x in v): - return tuple(x.strip() for x in v) - raise ConfigError( - f"`roles.{label}` must be a string or a list of non-empty strings" - ) + items = (v,) + elif isinstance(v, list) and all(isinstance(x, str) and x.strip() for x in v): + items = tuple(x.strip() for x in v) + else: + raise ConfigError( + f"`roles.{label}` must be a string or a list of non-empty strings" + ) + try: + return tuple(resolve_bundled(x) for x in items) + except (FileNotFoundError, ValueError) as e: + raise ConfigError(f"`roles.{label}`: {e}") from e return Roles( planner=_argv("planner"), diff --git a/roles/evaluator.py b/evolution_kernel/roles/evaluator.py similarity index 100% rename from roles/evaluator.py rename to evolution_kernel/roles/evaluator.py diff --git a/roles/executor.sh b/evolution_kernel/roles/executor.sh similarity index 100% rename from roles/executor.sh rename to evolution_kernel/roles/executor.sh diff --git a/roles/goal_evaluator.py b/evolution_kernel/roles/goal_evaluator.py similarity index 98% rename from roles/goal_evaluator.py rename to evolution_kernel/roles/goal_evaluator.py index e816a85..c838040 100644 --- a/roles/goal_evaluator.py +++ b/evolution_kernel/roles/goal_evaluator.py @@ -5,7 +5,7 @@ is complete, and writes goal_evaluation.json. LLM provider/model are read from config.json in the same run directory (same -pattern as roles/planner.py). +pattern as evolution_kernel/roles/planner.py). """ from __future__ import annotations diff --git a/roles/planner.py b/evolution_kernel/roles/planner.py similarity index 100% rename from roles/planner.py rename to evolution_kernel/roles/planner.py diff --git a/roles/strategist.py b/evolution_kernel/roles/strategist.py similarity index 98% rename from roles/strategist.py rename to evolution_kernel/roles/strategist.py index 02db832..3d18c42 100644 --- a/roles/strategist.py +++ b/evolution_kernel/roles/strategist.py @@ -5,7 +5,7 @@ (current stage, next milestone, taboo directions), and writes strategy.json. LLM provider/model are read from config.json in the same run directory (same -pattern as roles/planner.py). +pattern as evolution_kernel/roles/planner.py). """ from __future__ import annotations diff --git a/evolution_kernel/templates/benchmark.yml b/evolution_kernel/templates/benchmark.yml index 3ded4c4..446527a 100644 --- a/evolution_kernel/templates/benchmark.yml +++ b/evolution_kernel/templates/benchmark.yml @@ -34,6 +34,6 @@ parallel: k_branches: 3 # explore 3 candidates per round; best fitness wins roles: - planner: ["python3", "roles/planner.py"] - executor: ["bash", "roles/executor.sh"] - evaluator: ["python3", "roles/evaluator.py"] + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] diff --git a/evolution_kernel/templates/coverage.yml b/evolution_kernel/templates/coverage.yml index ff247a6..0b7115b 100644 --- a/evolution_kernel/templates/coverage.yml +++ b/evolution_kernel/templates/coverage.yml @@ -30,6 +30,6 @@ hard_stops: max_total_tokens: 1000000 roles: - planner: ["python3", "roles/planner.py"] - executor: ["bash", "roles/executor.sh"] - evaluator: ["python3", "roles/evaluator.py"] + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] diff --git a/evolution_kernel/templates/custom.yml b/evolution_kernel/templates/custom.yml index dcbc52e..9f75a03 100644 --- a/evolution_kernel/templates/custom.yml +++ b/evolution_kernel/templates/custom.yml @@ -31,6 +31,6 @@ hard_stops: max_total_tokens: 500000 roles: - planner: ["python3", "roles/planner.py"] - executor: ["bash", "roles/executor.sh"] - evaluator: ["python3", "roles/evaluator.py"] + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] diff --git a/evolution_kernel/templates/lint.yml b/evolution_kernel/templates/lint.yml index c2dc415..f518312 100644 --- a/evolution_kernel/templates/lint.yml +++ b/evolution_kernel/templates/lint.yml @@ -30,6 +30,6 @@ hard_stops: max_total_tokens: 500000 roles: - planner: ["python3", "roles/planner.py"] - executor: ["bash", "roles/executor.sh"] - evaluator: ["python3", "roles/evaluator.py"] + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] diff --git a/evolution_kernel/templates/perf.yml b/evolution_kernel/templates/perf.yml index f4addaf..111e688 100644 --- a/evolution_kernel/templates/perf.yml +++ b/evolution_kernel/templates/perf.yml @@ -30,6 +30,6 @@ hard_stops: max_total_tokens: 1500000 roles: - planner: ["python3", "roles/planner.py"] - executor: ["bash", "roles/executor.sh"] - evaluator: ["python3", "roles/evaluator.py"] + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] diff --git a/examples/evolution.yml b/examples/evolution.yml index 8abf941..a949510 100644 --- a/examples/evolution.yml +++ b/examples/evolution.yml @@ -6,7 +6,7 @@ llm: model: claude-sonnet-4-6 api_key_env: ANTHROPIC_API_KEY # name of the env var holding the key -# Coding agent used by roles/executor.sh +# Coding agent used by bundled:executor.sh coding_agent: tool: aider # aider | claude-code @@ -31,6 +31,6 @@ hard_stops: max_total_tokens: 500000 # stop if total tokens reaches 500k roles: - planner: ["python3", "roles/planner.py"] - executor: ["bash", "roles/executor.sh"] - evaluator: ["python3", "roles/evaluator.py"] + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] diff --git a/examples/oss_fix_demo/bots/executor.py b/examples/oss_fix_demo/bots/executor.py index 347046e..cf63f31 100644 --- a/examples/oss_fix_demo/bots/executor.py +++ b/examples/oss_fix_demo/bots/executor.py @@ -1,6 +1,6 @@ """OSS-fix-demo executor: invokes `claude -p` inside the worktree. -The kernel-bundled `roles/executor.sh` claude-code path drops permission +The kernel-bundled `evolution_kernel/roles/executor.sh` claude-code path drops permission flags, so claude refuses to make edits in non-interactive mode. This wrapper sets `--permission-mode acceptEdits` so the agent actually edits files. The cost is whatever your Claude Pro / Max subscription already diff --git a/pyproject.toml b/pyproject.toml index 08ef086..b8f7c53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "evolution-kernel" -version = "1.1.1" +version = "1.1.2" description = "A minimal autonomous evolution kernel with isolated planner, executor, evaluator roles." readme = "README.md" requires-python = ">=3.10" @@ -52,5 +52,5 @@ evolution-kernel = "evolution_kernel.cli:main" packages = ["evolution_kernel"] [tool.setuptools.package-data] -evolution_kernel = ["templates/*.yml"] +evolution_kernel = ["templates/*.yml", "roles/*"] diff --git a/tests/test_bundled_roles.py b/tests/test_bundled_roles.py new file mode 100644 index 0000000..7baf8ba --- /dev/null +++ b/tests/test_bundled_roles.py @@ -0,0 +1,94 @@ +"""Tests for the `bundled:` prefix in `roles.*` argv entries.""" +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path + +from evolution_kernel._bundled import BUNDLED_PREFIX, resolve_bundled +from evolution_kernel.config import ConfigError, load_config + + +SAMPLE_CONFIG = """\ +mission: "test" +llm: + provider: anthropic + model: claude-sonnet-4-6 + api_key_env: ANTHROPIC_API_KEY +coding_agent: + tool: aider +evidence_sources: + - type: file + path: "metrics.json" +mutation_scope: + allowed_paths: + - "src/" +roles: + planner: ["python3", "bundled:planner.py"] + executor: ["bash", "bundled:executor.sh"] + evaluator: ["python3", "bundled:evaluator.py"] +""" + + +class BundledPrefixTest(unittest.TestCase): + def test_resolve_bundled_returns_absolute_path_to_existing_file(self): + path = resolve_bundled("bundled:executor.sh") + resolved = Path(path) + self.assertTrue(resolved.is_absolute(), f"expected absolute path, got {path!r}") + self.assertTrue(resolved.exists(), f"resolved path does not exist: {path!r}") + self.assertEqual(resolved.name, "executor.sh") + self.assertEqual(resolved.parent.name, "roles") + + def test_resolve_bundled_is_noop_for_non_prefixed_strings(self): + self.assertEqual(resolve_bundled("python3"), "python3") + self.assertEqual(resolve_bundled("/abs/path/to/exec"), "/abs/path/to/exec") + self.assertEqual(resolve_bundled("./relative/path.py"), "./relative/path.py") + + def test_resolve_bundled_rejects_path_separators(self): + with self.assertRaises(ValueError): + resolve_bundled("bundled:../escape.py") + with self.assertRaises(ValueError): + resolve_bundled("bundled:subdir/file.py") + with self.assertRaises(ValueError): + resolve_bundled(BUNDLED_PREFIX) # empty name + + def test_resolve_bundled_raises_for_missing_files(self): + with self.assertRaises(FileNotFoundError): + resolve_bundled("bundled:does-not-exist.xyz") + + def test_load_config_resolves_bundled_in_roles(self): + with tempfile.TemporaryDirectory() as tmpdir: + cfg_path = Path(tmpdir) / "evolution.yml" + cfg_path.write_text(SAMPLE_CONFIG, encoding="utf-8") + cfg = load_config(str(cfg_path)) + + # `bundled:planner.py` should have become an absolute path to an + # existing file inside the installed package. + planner_script = Path(cfg.roles.planner[1]) + executor_script = Path(cfg.roles.executor[1]) + evaluator_script = Path(cfg.roles.evaluator[1]) + + for script in (planner_script, executor_script, evaluator_script): + self.assertTrue(script.is_absolute(), f"not absolute: {script}") + self.assertTrue(script.exists(), f"missing: {script}") + self.assertEqual(script.parent.name, "roles") + + # Surrounding argv entries (python3, bash) must be untouched. + self.assertEqual(cfg.roles.planner[0], "python3") + self.assertEqual(cfg.roles.executor[0], "bash") + self.assertEqual(cfg.roles.evaluator[0], "python3") + + def test_load_config_surfaces_missing_bundled_role_as_config_error(self): + bad_cfg = SAMPLE_CONFIG.replace( + 'bundled:planner.py', 'bundled:nope.py' + ) + with tempfile.TemporaryDirectory() as tmpdir: + cfg_path = Path(tmpdir) / "evolution.yml" + cfg_path.write_text(bad_cfg, encoding="utf-8") + with self.assertRaises(ConfigError) as ctx: + load_config(str(cfg_path)) + self.assertIn("nope.py", str(ctx.exception)) + + +if __name__ == "__main__": # pragma: no cover + unittest.main()