From 06ed99bf79eb47d85fd679f200296e0a6284ceb9 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 12:55:02 +0800 Subject: [PATCH 01/25] feat(skill): scaffold openkb/prompts/ for static system prompts --- openkb/prompts/__init__.py | 21 +++++++++++++++++++++ openkb/prompts/skill_compile.md | 1 + 2 files changed, 22 insertions(+) create mode 100644 openkb/prompts/__init__.py create mode 100644 openkb/prompts/skill_compile.md diff --git a/openkb/prompts/__init__.py b/openkb/prompts/__init__.py new file mode 100644 index 00000000..c28b2b1d --- /dev/null +++ b/openkb/prompts/__init__.py @@ -0,0 +1,21 @@ +"""Static prompt templates loaded from disk. + +Keeping multi-paragraph LLM system prompts in `.md` files (rather than triple- +quoted Python strings) makes them readable in editors with markdown previews +and easier to diff/review. +""" +from __future__ import annotations + +from pathlib import Path + +_PROMPTS_DIR = Path(__file__).parent + + +def load_prompt(name: str) -> str: + """Return the contents of ``openkb/prompts/.md`` as a string. + + Args: + name: Filename without the ``.md`` suffix (e.g. ``"skill_compile"``). + """ + path = _PROMPTS_DIR / f"{name}.md" + return path.read_text(encoding="utf-8") diff --git a/openkb/prompts/skill_compile.md b/openkb/prompts/skill_compile.md new file mode 100644 index 00000000..cd88e865 --- /dev/null +++ b/openkb/prompts/skill_compile.md @@ -0,0 +1 @@ +PLACEHOLDER — populated in Task 4. From 4f1db4709bbe30910252ffdb686fbb25ee8251f6 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 12:59:26 +0800 Subject: [PATCH 02/25] feat(skill): path-scoped IO tools for the skill compile agent --- openkb/agent/skill_tools.py | 68 ++++++++++++++++++++++++++++++++++++ tests/test_skill_tools.py | 69 +++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 openkb/agent/skill_tools.py create mode 100644 tests/test_skill_tools.py diff --git a/openkb/agent/skill_tools.py b/openkb/agent/skill_tools.py new file mode 100644 index 00000000..ef24d94e --- /dev/null +++ b/openkb/agent/skill_tools.py @@ -0,0 +1,68 @@ +"""Path-scoped IO tools for the skill-compile agent. + +The skill-compile agent runs with three capabilities: + * READ from /wiki/** (the substrate) + * QUERY the wiki via openkb query (semantic retrieval, see skill_compiler.py) + * WRITE under /output/skills//** (the only output destination) + +These helpers enforce those boundaries at the Python level — every tool +resolves its target path, then verifies it stays inside the allowed root. +Path traversal (``..``) and absolute paths are rejected outright. +""" +from __future__ import annotations + +from pathlib import Path + + +def list_wiki_dir(directory: str, wiki_root: str) -> str: + """List ``.md`` files in a wiki subdirectory. + + Args: + directory: Path relative to *wiki_root* (e.g. ``"concepts"``). + wiki_root: Absolute path to ``/wiki``. + """ + root = Path(wiki_root).resolve() + target = (root / directory).resolve() + if not target.is_relative_to(root): + return "Access denied: path escapes wiki root." + if not target.exists() or not target.is_dir(): + return "No files found." + names = sorted(p.name for p in target.iterdir() if p.suffix == ".md") + return "\n".join(names) if names else "No files found." + + +def read_wiki_file_for_skill(path: str, wiki_root: str) -> str: + """Read a Markdown file from the wiki. + + Args: + path: File path relative to *wiki_root* (e.g. ``"concepts/attention.md"``). + wiki_root: Absolute path to ``/wiki``. + """ + root = Path(wiki_root).resolve() + full = (root / path).resolve() + if not full.is_relative_to(root): + return "Access denied: path escapes wiki root." + if not full.exists(): + return f"File not found: {path}" + return full.read_text(encoding="utf-8") + + +def write_skill_file(path: str, content: str, skill_root: str) -> str: + """Write a file under the skill directory. + + Args: + path: Path relative to *skill_root* (e.g. ``"SKILL.md"`` or + ``"references/methodology.md"``). Absolute paths and ``..`` + traversal are rejected. + content: File contents. + skill_root: Absolute path to ``/output/skills/``. + """ + if path.startswith("/") or ".." in Path(path).parts: + return "Access denied: only relative paths within the skill directory are allowed." + root = Path(skill_root).resolve() + full = (root / path).resolve() + if not full.is_relative_to(root): + return "Access denied: path escapes skill root." + full.parent.mkdir(parents=True, exist_ok=True) + full.write_text(content, encoding="utf-8") + return f"Written: {path}" diff --git a/tests/test_skill_tools.py b/tests/test_skill_tools.py new file mode 100644 index 00000000..3c620c97 --- /dev/null +++ b/tests/test_skill_tools.py @@ -0,0 +1,69 @@ +"""Tests for openkb.agent.skill_tools — path-scoped IO for the skill compile agent.""" +from __future__ import annotations + +import pytest + +from openkb.agent.skill_tools import ( + list_wiki_dir, + read_wiki_file_for_skill, + write_skill_file, +) + + +def _make_wiki(tmp_path): + wiki = tmp_path / "wiki" + (wiki / "concepts").mkdir(parents=True) + (wiki / "summaries").mkdir(parents=True) + (wiki / "index.md").write_text("# index\n") + (wiki / "concepts" / "attention.md").write_text("# attention\n") + return wiki + + +def test_read_wiki_file_returns_content(tmp_path): + wiki = _make_wiki(tmp_path) + out = read_wiki_file_for_skill("concepts/attention.md", str(wiki)) + assert "attention" in out + + +def test_read_wiki_file_rejects_escape(tmp_path): + wiki = _make_wiki(tmp_path) + out = read_wiki_file_for_skill("../secret.txt", str(wiki)) + assert "Access denied" in out + + +def test_list_wiki_dir_returns_filenames(tmp_path): + wiki = _make_wiki(tmp_path) + out = list_wiki_dir("concepts", str(wiki)) + assert "attention.md" in out + + +def test_list_wiki_dir_handles_missing(tmp_path): + wiki = _make_wiki(tmp_path) + out = list_wiki_dir("nonexistent", str(wiki)) + assert out == "No files found." + + +def test_write_skill_file_creates_file_and_parents(tmp_path): + skill_root = tmp_path / "output" / "skills" / "demo" + out = write_skill_file( + "references/methodology.md", + "# methodology\n", + str(skill_root), + ) + assert "Written" in out + assert (skill_root / "references" / "methodology.md").read_text() == "# methodology\n" + + +def test_write_skill_file_rejects_path_traversal(tmp_path): + skill_root = tmp_path / "output" / "skills" / "demo" + skill_root.mkdir(parents=True) + out = write_skill_file("../escaped.md", "x", str(skill_root)) + assert "Access denied" in out + assert not (skill_root.parent / "escaped.md").exists() + + +def test_write_skill_file_rejects_absolute_path(tmp_path): + skill_root = tmp_path / "output" / "skills" / "demo" + skill_root.mkdir(parents=True) + out = write_skill_file("/etc/passwd", "x", str(skill_root)) + assert "Access denied" in out From fd26e264752152259fade4b88f094ca7cdafd4b3 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:02:57 +0800 Subject: [PATCH 03/25] feat(skill): kebab-case skill name validation --- openkb/cli.py | 22 ++++++++++++++++++ tests/test_skill_name_validation.py | 35 +++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tests/test_skill_name_validation.py diff --git a/openkb/cli.py b/openkb/cli.py index 60d8551a..f5efc6e4 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -143,6 +143,28 @@ def _find_kb_dir(override: Path | None = None) -> Path | None: return None +def _validate_skill_name(name: str) -> str | None: + """Validate a skill slug. Returns None if OK, an error message if not. + + Rules: lowercase ``[a-z0-9-]``, no leading/trailing dash, no consecutive + dashes, 1-64 characters. This matches the directory name we'll create + under ``/output/skills/`` and the ``name:`` frontmatter field. + """ + if not name: + return "Skill name must not be empty." + if len(name) > 64: + return "Skill name must be at most 64 characters." + if not all(c.islower() or c.isdigit() or c == "-" for c in name): + return "Skill name must contain only lowercase letters, digits, and dashes." + if name.startswith("-"): + return "Skill name must not have a leading dash." + if name.endswith("-"): + return "Skill name must not have a trailing dash." + if "--" in name: + return "Skill name must not contain consecutive dashes." + return None + + def add_single_file(file_path: Path, kb_dir: Path) -> Literal["added", "skipped", "failed"]: """Convert, index, and compile a single document into the knowledge base. diff --git a/tests/test_skill_name_validation.py b/tests/test_skill_name_validation.py new file mode 100644 index 00000000..47e548dd --- /dev/null +++ b/tests/test_skill_name_validation.py @@ -0,0 +1,35 @@ +"""Tests for openkb.cli._validate_skill_name — kebab-case slug enforcement.""" +from __future__ import annotations + +import pytest + +from openkb.cli import _validate_skill_name + + +@pytest.mark.parametrize("name", [ + "karpathy-thinking", + "us-tax-2026", + "linalg-tutor", + "a", + "a-b-c-d-e-f-g", +]) +def test_accepts_valid_kebab_case(name): + assert _validate_skill_name(name) is None # None means OK + + +@pytest.mark.parametrize("name,reason_fragment", [ + ("", "empty"), + ("UPPER", "lowercase"), + ("has space", "lowercase"), + ("under_score", "lowercase"), + ("dots.bad", "lowercase"), + ("-leading", "leading"), + ("trailing-", "trailing"), + ("double--dash", "consecutive"), + ("../escape", "lowercase"), + ("a" * 65, "64 characters"), +]) +def test_rejects_invalid_names(name, reason_fragment): + msg = _validate_skill_name(name) + assert msg is not None + assert reason_fragment in msg.lower() From 11b52ca586b9b336c03031bb986549e80e875308 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:05:37 +0800 Subject: [PATCH 04/25] fix(skill): _validate_skill_name now enforces ASCII slug strictly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous c.islower() check accepted Unicode lowercase letters like 'é' and 'ü', contradicting the [a-z0-9-] docstring promise. Names are used as filesystem directories and YAML frontmatter, so the slug must stay ASCII. Add explicit a-z / 0-9 range check + 2 regression tests. --- openkb/cli.py | 2 +- tests/test_skill_name_validation.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/openkb/cli.py b/openkb/cli.py index f5efc6e4..2f525818 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -154,7 +154,7 @@ def _validate_skill_name(name: str) -> str | None: return "Skill name must not be empty." if len(name) > 64: return "Skill name must be at most 64 characters." - if not all(c.islower() or c.isdigit() or c == "-" for c in name): + if not all(("a" <= c <= "z") or ("0" <= c <= "9") or c == "-" for c in name): return "Skill name must contain only lowercase letters, digits, and dashes." if name.startswith("-"): return "Skill name must not have a leading dash." diff --git a/tests/test_skill_name_validation.py b/tests/test_skill_name_validation.py index 47e548dd..ab8d776a 100644 --- a/tests/test_skill_name_validation.py +++ b/tests/test_skill_name_validation.py @@ -28,6 +28,8 @@ def test_accepts_valid_kebab_case(name): ("double--dash", "consecutive"), ("../escape", "lowercase"), ("a" * 65, "64 characters"), + ("café", "lowercase"), + ("ünicöde", "lowercase"), ]) def test_rejects_invalid_names(name, reason_fragment): msg = _validate_skill_name(name) From f007a3d4873a7d53f13f4ccde386ed45e5ec4216 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:07:06 +0800 Subject: [PATCH 05/25] feat(skill): skill-compile system prompt template --- openkb/prompts/skill_compile.md | 98 ++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/openkb/prompts/skill_compile.md b/openkb/prompts/skill_compile.md index cd88e865..7023bf80 100644 --- a/openkb/prompts/skill_compile.md +++ b/openkb/prompts/skill_compile.md @@ -1 +1,97 @@ -PLACEHOLDER — populated in Task 4. +You are the OpenKB skill-compile agent. Your job: read the knowledge base +wiki at `/wiki/` and produce a redistributable Anthropic Skill at +`/output/skills/{skill_name}/`. Other agents — Claude Code, Codex CLI, +Gemini CLI, Cursor — will install this skill and load it on demand, so the +output must follow the Anthropic Skills directory spec exactly. + +## User intent + +The user requested this skill with the following description. Treat it as +authoritative; the whole skill exists to serve this intent. + +> {intent} + +## Wiki schema reference + +The wiki you can read from is structured as follows. + +{wiki_schema} + +## Your tools + +* `list_wiki_dir(directory)` — list files in a wiki subdirectory. +* `read_wiki_file(path)` — read a markdown file under `/wiki/`. +* `query_wiki(question)` — semantic search; returns a short answer with + citations. Use sparingly (it's an LLM call). Prefer direct file reads + when you already know which page you need. +* `write_skill_file(path, content)` — write under + `/output/skills/{skill_name}/`. Path must be relative; cannot escape + the skill root. +* `done(summary)` — signal completion. Call this exactly once when you are + finished writing files. After calling `done`, do not write any more. + +## Required output + +You MUST write `SKILL.md` at the skill root, with YAML frontmatter: + +```yaml +--- +name: {skill_name} +description: +--- + +# + + + +## When to use this skill + +- + +## Approach + + + +## References + +- [[references/]] (only if you wrote references) +``` + +The `description:` field is what other agents see. It must be specific +(not "a skill about X") and accurate. Spend extra care here. + +## Optional output + +* `references/.md` — depth material the loading agent reads on + demand. Use when content is too long for `SKILL.md` or only useful for + specific sub-questions. +* `scripts/.py` — executable helpers, ONLY if the user's intent + implies deterministic computation (e.g. a tax lookup, a formula + solver). Stick to the Python standard library; the loading environment + is unknown. + +## Working method + +1. Read `wiki/index.md` to see what's in the KB. +2. Form a brief mental plan: which concepts and summaries best serve the + user's intent? Which references will you need? +3. Read the relevant `wiki/concepts/*.md` and `wiki/summaries/*.md` files. +4. Write `SKILL.md` first. Then references, then scripts (if any). +5. Self-check: does every `[[references/...]]` link in `SKILL.md` resolve + to a file you actually wrote? Is the description specific? Is the + `name:` frontmatter field exactly `{skill_name}`? +6. Call `done(summary)` with a one-paragraph summary of what you wrote. + +## Anti-hallucination rules + +* Cite sources for non-trivial claims using `[[concepts/]]` or + `[[summaries/]]` wikilinks back to the wiki. +* If the wiki doesn't cover something the user's intent implies, write + what you can and note the gap in `SKILL.md`'s body — do not fabricate. +* Do not copy large verbatim passages from `wiki/sources/`. Summarise and + cite. The skill must be redistributable; bulk-copying source content + could carry copyright risk that the user takes on at submission time. + +Begin. From 6bbfb985fc5b34596172e70c23df3773a85fd332 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:10:08 +0800 Subject: [PATCH 06/25] feat(skill): build skill-compile agent + run loop --- openkb/agent/skill_compiler.py | 142 +++++++++++++++++++++++++++++++++ tests/test_skill_compiler.py | 84 +++++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 openkb/agent/skill_compiler.py create mode 100644 tests/test_skill_compiler.py diff --git a/openkb/agent/skill_compiler.py b/openkb/agent/skill_compiler.py new file mode 100644 index 00000000..79b64751 --- /dev/null +++ b/openkb/agent/skill_compiler.py @@ -0,0 +1,142 @@ +"""The skill-compile agent. + +Builds on the same openai-agents-SDK + LiteLLM stack as the query agent +in ``openkb.agent.query``. The differences: + +* System prompt comes from ``openkb/prompts/skill_compile.md`` and is + interpolated with the user's intent and the target skill name. +* Tools are scoped: read the wiki, query the wiki, write only within + the target skill directory. +* The runner verifies that ``SKILL.md`` was written before declaring + success — an agent that gets confused and never writes the required + output should fail loudly rather than silently produce an empty dir. +""" +from __future__ import annotations + +import asyncio +from pathlib import Path + +from agents import Agent, Runner, function_tool +from agents.model_settings import ModelSettings + +from openkb.agent.skill_tools import ( + list_wiki_dir, + read_wiki_file_for_skill, + write_skill_file, +) +from openkb.prompts import load_prompt +from openkb.schema import get_agents_md + +MAX_TURNS = 80 # higher than query (50) because compile can write multiple files + + +def build_skill_compile_agent( + *, + wiki_root: str, + skill_root: str, + skill_name: str, + intent: str, + model: str, +) -> Agent: + """Build the openai-agents Agent for compiling one skill. + + Args: + wiki_root: ``/wiki`` absolute path. + skill_root: ``/output/skills/`` absolute path. Will be + created if it does not exist. + skill_name: kebab-case slug, also the ``name:`` frontmatter value. + intent: the user's natural-language description of what this skill + should do. + model: LiteLLM-formatted model name from KB config. + """ + Path(skill_root).mkdir(parents=True, exist_ok=True) + + wiki_schema = get_agents_md(Path(wiki_root)) + instructions = load_prompt("skill_compile").format( + intent=intent, + skill_name=skill_name, + wiki_schema=wiki_schema, + ) + + @function_tool + def list_wiki(directory: str) -> str: + """List .md files in a wiki subdirectory (e.g. 'concepts').""" + return list_wiki_dir(directory, wiki_root) + + @function_tool + def read_wiki(path: str) -> str: + """Read a wiki markdown file by path relative to wiki/ (e.g. 'concepts/attention.md').""" + return read_wiki_file_for_skill(path, wiki_root) + + @function_tool + def query_wiki(question: str) -> str: + """Run a semantic query over the wiki and return the answer. + + Use sparingly — this is itself an LLM call. Prefer reading specific + files when you already know which one you want. + """ + # Lazy import to avoid circular dependency at module import. + from openkb.agent.query import run_query + kb_dir = Path(wiki_root).parent + config_model = model + return asyncio.run(run_query(question, kb_dir, config_model, stream=False)) + + @function_tool + def write_skill(path: str, content: str) -> str: + """Write a file under the skill directory.""" + return write_skill_file(path, content, skill_root) + + @function_tool + def done(summary: str) -> str: + """Signal that the skill is complete. Call exactly once when finished.""" + return f"Compilation marked done: {summary}" + + return Agent( + name="skill-compiler", + instructions=instructions, + tools=[list_wiki, read_wiki, query_wiki, write_skill, done], + model=f"litellm/{model}", + model_settings=ModelSettings(parallel_tool_calls=False), + ) + + +async def run_skill_compile( + *, + kb_dir: Path, + skill_name: str, + intent: str, + model: str, +) -> Path: + """Compile a single skill from the KB's wiki. + + Returns the path to the produced skill directory. Raises + ``RuntimeError`` if the agent finishes without writing ``SKILL.md``. + """ + wiki_root = str(kb_dir / "wiki") + skill_root = kb_dir / "output" / "skills" / skill_name + + agent = build_skill_compile_agent( + wiki_root=wiki_root, + skill_root=str(skill_root), + skill_name=skill_name, + intent=intent, + model=model, + ) + + # Single user message kicks off the compile. The system prompt already + # contains the intent — this just nudges the agent to start. + seed = ( + f"Compile the skill '{skill_name}'. Follow the system prompt's " + f"working method. Read the wiki, then write the skill files." + ) + + await Runner.run(agent, seed, max_turns=MAX_TURNS) + + if not (skill_root / "SKILL.md").exists(): + raise RuntimeError( + f"Skill compilation finished but agent did not write SKILL.md " + f"at {skill_root}. The skill is incomplete; check the wiki has " + f"content related to your intent." + ) + + return skill_root diff --git a/tests/test_skill_compiler.py b/tests/test_skill_compiler.py new file mode 100644 index 00000000..6fd06f22 --- /dev/null +++ b/tests/test_skill_compiler.py @@ -0,0 +1,84 @@ +"""Tests for openkb.agent.skill_compiler. + +The agent itself is mocked (we don't want to spend tokens in unit tests). +What we DO test: + * Tools wire up correctly (write_skill_file is bound to the right path) + * System prompt gets the intent and skill_name interpolated + * The skill output dir is created before the agent starts + * If the agent finishes without writing SKILL.md, we surface an error +""" +from __future__ import annotations + +import pytest +from unittest.mock import AsyncMock, patch + +from openkb.agent.skill_compiler import ( + build_skill_compile_agent, + run_skill_compile, +) + + +def _make_kb(tmp_path): + (tmp_path / "wiki" / "concepts").mkdir(parents=True) + (tmp_path / "wiki" / "summaries").mkdir(parents=True) + (tmp_path / "wiki" / "index.md").write_text("# index\n\nNo concepts yet.\n") + (tmp_path / ".openkb").mkdir() + (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n") + return tmp_path + + +def test_build_agent_interpolates_intent_and_name(tmp_path): + kb = _make_kb(tmp_path) + agent = build_skill_compile_agent( + wiki_root=str(kb / "wiki"), + skill_root=str(kb / "output" / "skills" / "demo"), + skill_name="demo", + intent="distill a tax-lookup skill", + model="gpt-4o-mini", + ) + assert "demo" in agent.instructions + assert "distill a tax-lookup skill" in agent.instructions + + +@pytest.mark.asyncio +async def test_run_skill_compile_creates_output_dir(tmp_path): + kb = _make_kb(tmp_path) + target = kb / "output" / "skills" / "demo" + # Fake the agent run: just write a minimal SKILL.md to simulate success. + async def fake_runner(*args, **kwargs): + target.mkdir(parents=True, exist_ok=True) + (target / "SKILL.md").write_text( + "---\nname: demo\ndescription: test\n---\n\n# demo\n" + ) + from types import SimpleNamespace + return SimpleNamespace(final_output="done") + + with patch("openkb.agent.skill_compiler.Runner.run", new=AsyncMock(side_effect=fake_runner)): + await run_skill_compile( + kb_dir=kb, + skill_name="demo", + intent="test intent", + model="gpt-4o-mini", + ) + + assert (target / "SKILL.md").exists() + + +@pytest.mark.asyncio +async def test_run_skill_compile_raises_when_no_skill_md_written(tmp_path): + kb = _make_kb(tmp_path) + target = kb / "output" / "skills" / "demo" + target.mkdir(parents=True, exist_ok=True) + # Agent runs but never writes SKILL.md. + async def fake_runner(*args, **kwargs): + from types import SimpleNamespace + return SimpleNamespace(final_output="done") + + with patch("openkb.agent.skill_compiler.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with pytest.raises(RuntimeError, match="did not write SKILL.md"): + await run_skill_compile( + kb_dir=kb, + skill_name="demo", + intent="test intent", + model="gpt-4o-mini", + ) From 9cbc5c6ca2fda06bc564bb4f3cdcd7636905d0bf Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:13:57 +0800 Subject: [PATCH 07/25] feat(skill): regenerate per-KB marketplace.json from output/skills/ --- openkb/marketplace.py | 111 ++++++++++++++++++++++++++++++++++++++ tests/test_marketplace.py | 106 ++++++++++++++++++++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100644 openkb/marketplace.py create mode 100644 tests/test_marketplace.py diff --git a/openkb/marketplace.py b/openkb/marketplace.py new file mode 100644 index 00000000..3e803c4d --- /dev/null +++ b/openkb/marketplace.py @@ -0,0 +1,111 @@ +"""Regenerate the per-KB Claude Code plugin marketplace manifest. + +After every `openkb skill new` (and after any chat-side edit to a SKILL.md +frontmatter), this module scans ``/output/skills/*/SKILL.md`` and +rewrites ``/.claude-plugin/marketplace.json`` listing all currently +compiled skills. + +The schema matches the OpenKB repo's own ``.claude-plugin/marketplace.json``: +one plugin entry per KB, with a ``skills`` array of relative paths. Other +agent CLIs (``npx skills add``, Claude Code ``/plugin marketplace add``) +install from this manifest. + +This is a deterministic step — no LLM calls. +""" +from __future__ import annotations + +import json +import re +from pathlib import Path +from typing import Any + +from openkb.config import load_config + + +_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL) +_DESCRIPTION_RE = re.compile(r"^description:\s*(.+?)\s*$", re.MULTILINE) + + +def _read_skill_description(skill_md: Path) -> str: + """Pull the ``description:`` field from a SKILL.md frontmatter block. + + Returns an empty string if missing or unparseable — the manifest still + lists the skill, just with a generic plugin-level description. + """ + if not skill_md.exists(): + return "" + text = skill_md.read_text(encoding="utf-8") + fm_match = _FRONTMATTER_RE.match(text) + if not fm_match: + return "" + desc_match = _DESCRIPTION_RE.search(fm_match.group(1)) + if not desc_match: + return "" + return desc_match.group(1).strip() + + +def _kb_name(kb_dir: Path) -> str: + """Use the KB directory name as the marketplace name (sluggable).""" + return kb_dir.name + + +def _list_skill_dirs(kb_dir: Path) -> list[Path]: + """Return skill directories under /output/skills/ that contain a SKILL.md.""" + skills_root = kb_dir / "output" / "skills" + if not skills_root.is_dir(): + return [] + return sorted( + d for d in skills_root.iterdir() + if d.is_dir() and (d / "SKILL.md").exists() + ) + + +def _build_manifest(kb_dir: Path) -> dict[str, Any]: + skills = _list_skill_dirs(kb_dir) + skill_paths = [f"./output/skills/{d.name}" for d in skills] + + # Aggregate description for the manifest metadata + name = _kb_name(kb_dir) + metadata_desc = ( + f"Skills compiled from the '{name}' knowledge base via OpenKB." + ) + if skills: + first_desc = _read_skill_description(skills[0] / "SKILL.md") + if first_desc: + metadata_desc += f" Featured: {first_desc[:200]}" + + # Pull KB config for version if available; default to 0.1.0 + config = load_config(kb_dir / ".openkb" / "config.yaml") + version = str(config.get("version", "0.1.0")) + + return { + "name": name, + "metadata": { + "description": metadata_desc, + "version": version, + }, + "plugins": [ + { + "name": name, + "description": metadata_desc, + "source": "./", + "version": version, + "skills": skill_paths, + } + ], + } + + +def regenerate_marketplace(kb_dir: Path) -> Path: + """Rewrite ``/.claude-plugin/marketplace.json`` from current skills. + + Returns the path to the manifest. Creates ``.claude-plugin/`` if needed. + Safe to call when zero skills exist (manifest lists an empty ``skills`` + array). + """ + manifest = _build_manifest(kb_dir) + out_dir = kb_dir / ".claude-plugin" + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / "marketplace.json" + out_path.write_text(json.dumps(manifest, indent=2) + "\n", encoding="utf-8") + return out_path diff --git a/tests/test_marketplace.py b/tests/test_marketplace.py new file mode 100644 index 00000000..3a7df48d --- /dev/null +++ b/tests/test_marketplace.py @@ -0,0 +1,106 @@ +"""Tests for openkb.marketplace — regenerate /.claude-plugin/marketplace.json +from /output/skills/*/SKILL.md.""" +from __future__ import annotations + +import json +import textwrap + +from openkb.marketplace import regenerate_marketplace + + +def _make_kb(tmp_path): + (tmp_path / ".openkb").mkdir() + (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n") + (tmp_path / "output" / "skills").mkdir(parents=True) + return tmp_path + + +def _make_skill(kb, name, description): + d = kb / "output" / "skills" / name + d.mkdir(parents=True, exist_ok=True) + (d / "SKILL.md").write_text(textwrap.dedent(f"""\ + --- + name: {name} + description: {description} + --- + + # {name} + """)) + + +def test_regenerate_creates_manifest_with_one_skill(tmp_path): + kb = _make_kb(tmp_path) + _make_skill(kb, "karpathy-thinking", "Reason like Karpathy on transformers.") + + regenerate_marketplace(kb) + + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + assert manifest["plugins"][0]["skills"] == ["./output/skills/karpathy-thinking"] + # The plugin's description carries the SKILL.md description for the latest entry, + # OR we use a fixed manifest-level description — implementation choice; we just + # check the SKILL.md description appears somewhere in the manifest JSON. + assert "Reason like Karpathy" in json.dumps(manifest) + + +def test_regenerate_lists_multiple_skills_alphabetical(tmp_path): + kb = _make_kb(tmp_path) + _make_skill(kb, "zeta-skill", "z") + _make_skill(kb, "alpha-skill", "a") + _make_skill(kb, "middle-skill", "m") + + regenerate_marketplace(kb) + + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + assert manifest["plugins"][0]["skills"] == [ + "./output/skills/alpha-skill", + "./output/skills/middle-skill", + "./output/skills/zeta-skill", + ] + + +def test_regenerate_replaces_existing_file(tmp_path): + kb = _make_kb(tmp_path) + (kb / ".claude-plugin").mkdir() + (kb / ".claude-plugin" / "marketplace.json").write_text('{"name": "stale"}') + + _make_skill(kb, "demo", "d") + regenerate_marketplace(kb) + + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + assert manifest["name"] != "stale" + assert manifest["plugins"][0]["skills"] == ["./output/skills/demo"] + + +def test_regenerate_handles_zero_skills(tmp_path): + kb = _make_kb(tmp_path) + regenerate_marketplace(kb) + + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + assert manifest["plugins"][0]["skills"] == [] + + +def test_regenerate_skips_skill_with_missing_skill_md(tmp_path): + kb = _make_kb(tmp_path) + # Folder exists but no SKILL.md — should not appear in manifest + (kb / "output" / "skills" / "broken").mkdir(parents=True) + _make_skill(kb, "good", "g") + + regenerate_marketplace(kb) + + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + assert manifest["plugins"][0]["skills"] == ["./output/skills/good"] + + +def test_regenerate_reads_description_from_frontmatter(tmp_path): + kb = _make_kb(tmp_path) + _make_skill(kb, "demo", "the specific description goes here") + regenerate_marketplace(kb) + + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + # Manifest's metadata.description should at minimum mention the KB context; + # the per-skill description lives in the SKILL.md itself (the manifest just + # points at the skill directories). + assert "marketplace.json" in str(kb / ".claude-plugin" / "marketplace.json") + # Reload SKILL.md to confirm description preserved + skill_md = (kb / "output" / "skills" / "demo" / "SKILL.md").read_text() + assert "the specific description goes here" in skill_md From e12dfd2bfc2d5a1659fb8afcb3918b05688e3df3 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:17:43 +0800 Subject: [PATCH 08/25] fix(skill): include owner/author in marketplace.json from git config The plan-verbatim implementation omitted the 'owner' field at the top level. Some marketplace consumers (Claude Code's /plugin marketplace add) expect 'owner' for listing/attribution. Derive from git config; fall back to 'openkb-user' if git isn't configured. Mirror as plugin-level 'author' to match the existing repo convention. --- openkb/marketplace.py | 38 ++++++++++++++++++++++++++--- tests/test_marketplace.py | 51 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 4 deletions(-) diff --git a/openkb/marketplace.py b/openkb/marketplace.py index 3e803c4d..48887afe 100644 --- a/openkb/marketplace.py +++ b/openkb/marketplace.py @@ -5,10 +5,11 @@ rewrites ``/.claude-plugin/marketplace.json`` listing all currently compiled skills. -The schema matches the OpenKB repo's own ``.claude-plugin/marketplace.json``: -one plugin entry per KB, with a ``skills`` array of relative paths. Other -agent CLIs (``npx skills add``, Claude Code ``/plugin marketplace add``) -install from this manifest. +The schema is a subset compatible with the OpenKB repo's own +``.claude-plugin/marketplace.json``: one plugin entry per KB, with a +``skills`` array of relative paths. ``owner`` is derived from git config +so Claude Code's ``/plugin marketplace add`` accepts the manifest. Other +agent CLIs (``npx skills add``) install from the same file. This is a deterministic step — no LLM calls. """ @@ -26,6 +27,32 @@ _DESCRIPTION_RE = re.compile(r"^description:\s*(.+?)\s*$", re.MULTILINE) +def _git_owner() -> dict[str, str]: + """Read user.name and user.email from git config for the manifest owner. + + Falls back to placeholders if git isn't configured — the manifest is + still valid, just less helpful for marketplace listings. + """ + import subprocess + + def _git(key: str) -> str: + try: + result = subprocess.run( + ["git", "config", "--get", key], + capture_output=True, text=True, timeout=2, + ) + return result.stdout.strip() + except (subprocess.SubprocessError, FileNotFoundError): + return "" + + name = _git("user.name") or "openkb-user" + email = _git("user.email") or "" + owner: dict[str, str] = {"name": name} + if email: + owner["email"] = email + return owner + + def _read_skill_description(skill_md: Path) -> str: """Pull the ``description:`` field from a SKILL.md frontmatter block. @@ -78,8 +105,10 @@ def _build_manifest(kb_dir: Path) -> dict[str, Any]: config = load_config(kb_dir / ".openkb" / "config.yaml") version = str(config.get("version", "0.1.0")) + owner = _git_owner() return { "name": name, + "owner": owner, "metadata": { "description": metadata_desc, "version": version, @@ -90,6 +119,7 @@ def _build_manifest(kb_dir: Path) -> dict[str, Any]: "description": metadata_desc, "source": "./", "version": version, + "author": owner, "skills": skill_paths, } ], diff --git a/tests/test_marketplace.py b/tests/test_marketplace.py index 3a7df48d..62c94f6c 100644 --- a/tests/test_marketplace.py +++ b/tests/test_marketplace.py @@ -104,3 +104,54 @@ def test_regenerate_reads_description_from_frontmatter(tmp_path): # Reload SKILL.md to confirm description preserved skill_md = (kb / "output" / "skills" / "demo" / "SKILL.md").read_text() assert "the specific description goes here" in skill_md + + +def test_regenerate_includes_owner_from_git_config(tmp_path, monkeypatch): + """The manifest must include an ``owner`` field at the top level so + that Claude Code's /plugin marketplace add accepts it.""" + kb = _make_kb(tmp_path) + _make_skill(kb, "demo", "d") + + # Patch subprocess.run to return a controlled git output + import subprocess + real_run = subprocess.run + + def fake_run(cmd, **kwargs): + if cmd[:3] == ["git", "config", "--get"]: + key = cmd[3] + if key == "user.name": + return subprocess.CompletedProcess(cmd, 0, stdout="Test User\n", stderr="") + if key == "user.email": + return subprocess.CompletedProcess(cmd, 0, stdout="test@example.com\n", stderr="") + return real_run(cmd, **kwargs) + + monkeypatch.setattr(subprocess, "run", fake_run) + + regenerate_marketplace(kb) + + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + assert manifest["owner"] == {"name": "Test User", "email": "test@example.com"} + assert manifest["plugins"][0]["author"] == {"name": "Test User", "email": "test@example.com"} + + +def test_regenerate_falls_back_when_no_git_config(tmp_path, monkeypatch): + """If git config is empty, manifest still generates with a placeholder.""" + kb = _make_kb(tmp_path) + _make_skill(kb, "demo", "d") + + import subprocess + real_run = subprocess.run + + def fake_run(cmd, **kwargs): + if cmd[:3] == ["git", "config", "--get"]: + return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="") + return real_run(cmd, **kwargs) + + monkeypatch.setattr(subprocess, "run", fake_run) + + regenerate_marketplace(kb) + + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + assert manifest["owner"]["name"] == "openkb-user" + # email is optional when missing + assert "email" not in manifest["owner"] or manifest["owner"]["email"] == "" From 0fdf0c4ae6d5753030bdbe7e4aac95d01891b8df Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:19:03 +0800 Subject: [PATCH 09/25] feat(skill): Generator primitive for output/* artifacts --- openkb/generator.py | 66 +++++++++++++++++++++++++++++++++++++++++ tests/test_generator.py | 60 +++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 openkb/generator.py create mode 100644 tests/test_generator.py diff --git a/openkb/generator.py b/openkb/generator.py new file mode 100644 index 00000000..f24b4b73 --- /dev/null +++ b/openkb/generator.py @@ -0,0 +1,66 @@ +"""Generator primitive — shared abstraction for all `/output//` artifacts. + +v0.1 supports ``target_type="skill"`` only. Future targets (``"ppt"``, +``"podcast"``, ``"report"``, ``"video"``) will register here and reuse the +same: + +* output-path convention: ``/output///`` +* post-run hook: marketplace.json regeneration (so artifacts ride the same + distribution mechanic as skills) + +Each target plugs in its own `run` coroutine. v0.1's only entry calls into +``openkb.agent.skill_compiler.run_skill_compile``. +""" +from __future__ import annotations + +from pathlib import Path +from typing import Literal + +from openkb.agent.skill_compiler import run_skill_compile +from openkb.marketplace import regenerate_marketplace + + +TargetType = Literal["skill"] # extend as new targets land + + +class Generator: + """A v0.1 generator instance. + + Args: + target_type: One of the supported targets. v0.1: ``"skill"``. + name: kebab-case slug; becomes the output directory name. + intent: natural-language description of the desired artifact. + kb_dir: KB root. + model: LiteLLM model name (from KB config). + """ + + def __init__( + self, + *, + target_type: TargetType, + name: str, + intent: str, + kb_dir: Path, + model: str, + ) -> None: + if target_type != "skill": + raise ValueError( + f"Unknown target_type {target_type!r}. v0.1 supports only 'skill'." + ) + self.target_type = target_type + self.name = name + self.intent = intent + self.kb_dir = kb_dir + self.model = model + self.output_dir = kb_dir / "output" / f"{target_type}s" / name + + async def run(self) -> Path: + """Execute the generator. Returns the path to the produced artifact.""" + await run_skill_compile( + kb_dir=self.kb_dir, + skill_name=self.name, + intent=self.intent, + model=self.model, + ) + regenerate_marketplace(self.kb_dir) + return self.output_dir diff --git a/tests/test_generator.py b/tests/test_generator.py new file mode 100644 index 00000000..b2e7d0de --- /dev/null +++ b/tests/test_generator.py @@ -0,0 +1,60 @@ +"""Tests for openkb.generator.Generator — the v0.1 abstraction that will +be reused by future ppt / podcast generators. + +In v0.1, only target_type='skill' is supported. We test the dispatch shape +so future targets slot in cleanly.""" +from __future__ import annotations + +import pytest +from unittest.mock import AsyncMock, patch + +from openkb.generator import Generator + + +def _make_kb(tmp_path): + (tmp_path / ".openkb").mkdir() + (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n") + (tmp_path / "wiki").mkdir() + (tmp_path / "wiki" / "index.md").write_text("# index\n") + return tmp_path + + +def test_generator_rejects_unknown_target_type(tmp_path): + kb = _make_kb(tmp_path) + with pytest.raises(ValueError, match="target_type"): + Generator( + target_type="ppt", + name="demo", + intent="x", + kb_dir=kb, + model="gpt-4o-mini", + ) + + +def test_generator_skill_target_constructs_ok(tmp_path): + kb = _make_kb(tmp_path) + g = Generator( + target_type="skill", + name="demo", + intent="x", + kb_dir=kb, + model="gpt-4o-mini", + ) + assert g.output_dir == kb / "output" / "skills" / "demo" + + +@pytest.mark.asyncio +async def test_generator_run_delegates_to_skill_compiler(tmp_path): + kb = _make_kb(tmp_path) + g = Generator( + target_type="skill", + name="demo", + intent="x", + kb_dir=kb, + model="gpt-4o-mini", + ) + with patch("openkb.generator.run_skill_compile", new=AsyncMock()) as runner, \ + patch("openkb.generator.regenerate_marketplace") as regen: + await g.run() + runner.assert_awaited_once() + regen.assert_called_once_with(kb) From 3c55703551ccacb627982e6f8d4a13237d101ab6 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:22:55 +0800 Subject: [PATCH 10/25] feat(skill): openkb skill new CLI command --- openkb/cli.py | 111 ++++++++++++++++++++++++++++++++++++++++ tests/test_skill_cli.py | 111 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+) create mode 100644 tests/test_skill_cli.py diff --git a/openkb/cli.py b/openkb/cli.py index 2f525818..9d7d950c 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -1362,3 +1362,114 @@ def feedback(ctx, message, feedback_type): " (no browser available — copy the URL above to file the issue)", err=True, ) + + +# --------------------------------------------------------------------------- +# `openkb skill ...` — skill factory (v0.1) +# --------------------------------------------------------------------------- + +@cli.group() +def skill(): + """Compile knowledge into a redistributable Anthropic Skill.""" + + +@skill.command("new") +@click.argument("name") +@click.argument("intent") +@click.option( + "-y", "--yes", "yes_flag", + is_flag=True, default=False, + help="Overwrite existing output/skills// without prompting.", +) +@click.pass_context +def skill_new(ctx, name, intent, yes_flag): + """Compile a new skill from this KB's wiki. + + NAME is a kebab-case slug used for the output directory and skill name. + INTENT is a natural-language description of what this skill should do. + + Example: + + openkb skill new karpathy-thinking "Reason about transformers like Karpathy" + """ + import asyncio + import shutil + import sys + + kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) + if kb_dir is None: + click.echo("No knowledge base found. Run `openkb init` first.", err=True) + ctx.exit(1) + + # Validate name + err = _validate_skill_name(name) + if err: + click.echo(f"[ERROR] {err}", err=True) + ctx.exit(2) + + # Validate wiki exists and has content + wiki = kb_dir / "wiki" + if not wiki.is_dir(): + click.echo( + "[ERROR] No wiki found in this KB. Run `openkb add ` " + "to ingest documents first.", + err=True, + ) + ctx.exit(1) + has_content = any(wiki.iterdir()) + if not has_content: + click.echo( + "[ERROR] Wiki has no compiled content yet. Ingest at least one " + "document with `openkb add` first.", + err=True, + ) + ctx.exit(1) + + # Overwrite handling + target = kb_dir / "output" / "skills" / name + if target.exists(): + if yes_flag: + shutil.rmtree(target) + elif sys.stdin.isatty(): + if not click.confirm( + f"output/skills/{name}/ already exists. Overwrite?", + default=False, + ): + click.echo("Aborted.") + ctx.exit(1) + shutil.rmtree(target) + else: + click.echo( + f"[ERROR] output/skills/{name}/ exists. Pass -y to overwrite " + f"in non-interactive contexts.", + err=True, + ) + ctx.exit(1) + + # Load config + key + _setup_llm_key(kb_dir) + config = load_config(kb_dir / ".openkb" / "config.yaml") + model = config.get("model", DEFAULT_CONFIG["model"]) + + # Run the generator + from openkb.generator import Generator + click.echo(f"Compiling skill '{name}'...") + try: + gen = Generator( + target_type="skill", + name=name, + intent=intent, + kb_dir=kb_dir, + model=model, + ) + asyncio.run(gen.run()) + except RuntimeError as exc: + click.echo(f"[ERROR] {exc}", err=True) + ctx.exit(1) + + click.echo(f"\nSaved: output/skills/{name}/") + click.echo(f"Manifest: .claude-plugin/marketplace.json updated") + click.echo(f"\nInstall locally:") + click.echo(f" cp -r output/skills/{name} ~/.claude/skills/") + click.echo(f"\nShare (push KB to GitHub, then):") + click.echo(f" npx skills@latest add /") diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py new file mode 100644 index 00000000..c1ef6a30 --- /dev/null +++ b/tests/test_skill_cli.py @@ -0,0 +1,111 @@ +"""End-to-end tests for `openkb skill new` via click.testing.CliRunner. + +The agent runner is patched so these tests don't burn LLM tokens. They +verify the CLI wiring: KB detection, name validation, overwrite logic, +marketplace.json regeneration, exit codes.""" +from __future__ import annotations + +import json +from unittest.mock import AsyncMock, patch + +import pytest +from click.testing import CliRunner + +from openkb.cli import cli + + +def _make_kb(tmp_path): + (tmp_path / ".openkb").mkdir() + (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n") + (tmp_path / "wiki" / "concepts").mkdir(parents=True) + (tmp_path / "wiki" / "summaries").mkdir(parents=True) + (tmp_path / "wiki" / "index.md").write_text("# index\n") + return tmp_path + + +def _fake_compile(kb_dir, skill_name, **_kw): + """Side-effect for the patched run_skill_compile: write a minimal SKILL.md.""" + target = kb_dir / "output" / "skills" / skill_name + target.mkdir(parents=True, exist_ok=True) + (target / "SKILL.md").write_text( + f"---\nname: {skill_name}\ndescription: test description\n---\n\n# {skill_name}\n" + ) + + +def test_skill_new_succeeds_and_writes_files(tmp_path): + kb = _make_kb(tmp_path) + runner = CliRunner() + + async def fake_run(kb_dir, skill_name, intent, model): + _fake_compile(kb_dir, skill_name) + + with patch("openkb.cli._find_kb_dir", return_value=kb), \ + patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)): + result = runner.invoke(cli, ["skill", "new", "demo", "test intent"]) + + assert result.exit_code == 0, result.output + assert (kb / "output" / "skills" / "demo" / "SKILL.md").exists() + assert (kb / ".claude-plugin" / "marketplace.json").exists() + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + assert manifest["plugins"][0]["skills"] == ["./output/skills/demo"] + + +def test_skill_new_rejects_invalid_name(tmp_path): + kb = _make_kb(tmp_path) + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + result = runner.invoke(cli, ["skill", "new", "BadName", "x"]) + assert result.exit_code != 0 + assert "lowercase" in result.output.lower() + + +def test_skill_new_errors_without_kb(tmp_path): + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=None): + result = runner.invoke(cli, ["skill", "new", "demo", "x"]) + assert result.exit_code != 0 + assert "No knowledge base" in result.output + + +def test_skill_new_errors_with_empty_wiki(tmp_path): + kb = tmp_path + (kb / ".openkb").mkdir() + (kb / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n") + # No wiki/ directory + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + result = runner.invoke(cli, ["skill", "new", "demo", "x"]) + assert result.exit_code != 0 + assert "wiki" in result.output.lower() + + +def test_skill_new_aborts_when_target_exists_without_yes(tmp_path): + kb = _make_kb(tmp_path) + (kb / "output" / "skills" / "demo").mkdir(parents=True) + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + # Simulate non-interactive abort (CliRunner doesn't supply a TTY, + # which our error path treats as "must pass -y"). + result = runner.invoke(cli, ["skill", "new", "demo", "x"], input="n\n") + assert result.exit_code != 0 + # Either it asked and we said no, or it detected non-TTY and errored out. + out = result.output.lower() + assert "exists" in out or "overwrite" in out or "aborted" in out + + +def test_skill_new_overwrites_with_yes_flag(tmp_path): + kb = _make_kb(tmp_path) + (kb / "output" / "skills" / "demo").mkdir(parents=True) + (kb / "output" / "skills" / "demo" / "stale.txt").write_text("old") + runner = CliRunner() + + async def fake_run(kb_dir, skill_name, intent, model): + _fake_compile(kb_dir, skill_name) + + with patch("openkb.cli._find_kb_dir", return_value=kb), \ + patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)): + result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"]) + + assert result.exit_code == 0, result.output + assert not (kb / "output" / "skills" / "demo" / "stale.txt").exists() + assert (kb / "output" / "skills" / "demo" / "SKILL.md").exists() From e54fe8dd04d8352211a115d4eec9b0d9b057857d Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:26:22 +0800 Subject: [PATCH 11/25] fix(skill): restore strict wiki-content check in skill new The plan called for verifying that wiki/concepts/ or wiki/summaries/ has actual files before allowing skill compilation. Earlier impl loosened this to 'any file in wiki/', which silently accepted freshly-init'd KBs (openkb init pre-creates empty concepts/ + summaries/ dirs). Restore the strict check + populate test fixture + add regression test for the freshly-init'd case. --- openkb/cli.py | 5 ++++- tests/test_skill_cli.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/openkb/cli.py b/openkb/cli.py index 9d7d950c..b96894ba 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -1416,7 +1416,10 @@ def skill_new(ctx, name, intent, yes_flag): err=True, ) ctx.exit(1) - has_content = any(wiki.iterdir()) + has_content = any( + (wiki / sub).is_dir() and any((wiki / sub).iterdir()) + for sub in ("concepts", "summaries") + ) if not has_content: click.echo( "[ERROR] Wiki has no compiled content yet. Ingest at least one " diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py index c1ef6a30..297f9e1d 100644 --- a/tests/test_skill_cli.py +++ b/tests/test_skill_cli.py @@ -20,6 +20,9 @@ def _make_kb(tmp_path): (tmp_path / "wiki" / "concepts").mkdir(parents=True) (tmp_path / "wiki" / "summaries").mkdir(parents=True) (tmp_path / "wiki" / "index.md").write_text("# index\n") + # Populate the wiki with compiled content so the wiki-content gate accepts it. + (tmp_path / "wiki" / "concepts" / "demo.md").write_text("# demo\n") + (tmp_path / "wiki" / "summaries" / "demo.md").write_text("# demo\n") return tmp_path @@ -79,6 +82,24 @@ def test_skill_new_errors_with_empty_wiki(tmp_path): assert "wiki" in result.output.lower() +def test_skill_new_errors_with_freshly_init_wiki(tmp_path): + """A freshly init'd KB has wiki/ + empty concepts/ + summaries/ + index.md. + No documents have been ingested. The skill factory must refuse to compile + rather than spend tokens on an empty wiki.""" + kb = tmp_path + (kb / ".openkb").mkdir() + (kb / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n") + # Mirror openkb init's layout: empty concepts + summaries, just index.md + (kb / "wiki" / "concepts").mkdir(parents=True) + (kb / "wiki" / "summaries").mkdir(parents=True) + (kb / "wiki" / "index.md").write_text("# index\n") + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + result = runner.invoke(cli, ["skill", "new", "demo", "x"]) + assert result.exit_code != 0 + assert "compiled content" in result.output.lower() or "ingest" in result.output.lower() + + def test_skill_new_aborts_when_target_exists_without_yes(tmp_path): kb = _make_kb(tmp_path) (kb / "output" / "skills" / "demo").mkdir(parents=True) From 44459a1c6785e13f34c84de5722392951f63a17e Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:30:21 +0800 Subject: [PATCH 12/25] feat(skill): /skill new slash command + chat write tool extension --- openkb/agent/chat.py | 66 +++++++++++++++++++++++++++++++--- openkb/agent/query.py | 44 ++++++++++++++++++++++- openkb/agent/tools.py | 40 +++++++++++++++++++++ tests/test_skill_chat_slash.py | 64 +++++++++++++++++++++++++++++++++ 4 files changed, 209 insertions(+), 5 deletions(-) create mode 100644 tests/test_skill_chat_slash.py diff --git a/openkb/agent/chat.py b/openkb/agent/chat.py index 57802ce4..04ffe4f7 100644 --- a/openkb/agent/chat.py +++ b/openkb/agent/chat.py @@ -23,7 +23,7 @@ from prompt_toolkit.styles import Style from openkb.agent.chat_session import ChatSession -from openkb.agent.query import MAX_TURNS, build_query_agent +from openkb.agent.query import MAX_TURNS, build_chat_agent, build_query_agent from openkb.log import append_log @@ -59,6 +59,7 @@ " /list List all documents in the knowledge base\n" " /lint Lint the knowledge base\n" " /add Add a document or directory to the knowledge base\n" + ' /skill new "" Compile a skill from the wiki\n' " /help Show this" ) @@ -214,6 +215,7 @@ def _bottom_toolbar(session: ChatSession) -> FormattedText: ("/list", "List all documents"), ("/lint", "Lint the knowledge base"), ("/add", "Add a document or directory"), + ("/skill", "compile a skill (try `/skill new \"intent\"`)"), ] @@ -494,6 +496,59 @@ async def _run_add(arg: str, kb_dir: Path, style: Style) -> None: await asyncio.to_thread(add_single_file, target, kb_dir) +async def _handle_slash_skill(arg: str, kb_dir: Path, style: Style) -> None: + """Dispatch ``/skill new ""`` and any future skill subcommands.""" + import shlex + + parts = shlex.split(arg) if arg else [] + if not parts: + _fmt(style, ("class:error", "Usage: /skill new \"\"\n")) + return + + sub = parts[0].lower() + if sub != "new": + _fmt(style, ("class:error", f"Unknown skill subcommand: {sub}. Try /skill new.\n")) + return + + if len(parts) < 3: + _fmt(style, ("class:error", "Usage: /skill new \"\"\n")) + return + + name = parts[1] + intent = " ".join(parts[2:]) + + from openkb.cli import _validate_skill_name + err = _validate_skill_name(name) + if err: + _fmt(style, ("class:error", f"[ERROR] {err}\n")) + return + + # Load model from KB config + from openkb.config import load_config, DEFAULT_CONFIG + config = load_config(kb_dir / ".openkb" / "config.yaml") + model = config.get("model", DEFAULT_CONFIG["model"]) + + from openkb.generator import Generator + _fmt(style, ("class:slash.help", f"Compiling skill '{name}'...\n")) + try: + gen = Generator( + target_type="skill", + name=name, + intent=intent, + kb_dir=kb_dir, + model=model, + ) + await gen.run() + except RuntimeError as exc: + _fmt(style, ("class:error", f"[ERROR] {exc}\n")) + return + + _fmt(style, ("class:slash.ok", f"Saved: output/skills/{name}/\n")) + _fmt(style, ("class:slash.help", + f"Iterate: ask follow-up questions in this chat and the agent can " + f"edit files under output/skills/{name}/ directly.\n")) + + async def _handle_slash( cmd: str, kb_dir: Path, @@ -557,6 +612,10 @@ async def _handle_slash( await _run_add(arg, kb_dir, style) return None + if head == "/skill": + await _handle_slash_skill(arg, kb_dir, style) + return None + _fmt( style, ("class:error", f"Unknown command: {head}. Try /help.\n"), @@ -579,8 +638,7 @@ async def run_chat( config = load_config(kb_dir / ".openkb" / "config.yaml") language = session.language or config.get("language", "en") - wiki_root = str(kb_dir / "wiki") - agent = build_query_agent(wiki_root, session.model, language=language) + agent = build_chat_agent(kb_dir, session.model, language=language) _print_header(session, kb_dir, style) if session.turn_count > 0: @@ -620,7 +678,7 @@ async def run_chat( return if action == "new_session": session = ChatSession.new(kb_dir, session.model, session.language) - agent = build_query_agent(wiki_root, session.model, language=language) + agent = build_chat_agent(kb_dir, session.model, language=language) prompt_session = _make_prompt_session(session, style, use_color, kb_dir) continue diff --git a/openkb/agent/query.py b/openkb/agent/query.py index af3abfe4..790a186c 100644 --- a/openkb/agent/query.py +++ b/openkb/agent/query.py @@ -6,7 +6,12 @@ from agents import Agent, Runner, function_tool from agents import ToolOutputImage, ToolOutputText -from openkb.agent.tools import get_wiki_page_content, read_wiki_file, read_wiki_image +from openkb.agent.tools import ( + get_wiki_page_content, + read_wiki_file, + read_wiki_image, + write_kb_file, +) MAX_TURNS = 50 from openkb.schema import get_agents_md @@ -91,6 +96,43 @@ def get_image(image_path: str) -> ToolOutputImage | ToolOutputText: ) +def build_chat_agent( + kb_dir: Path, + model: str, + language: str = "en", +) -> Agent: + """Build the chat agent: query agent + a write tool restricted to + ``/wiki/explorations/**`` and ``/output/**``. + + This is the variant used by the interactive ``openkb chat`` REPL so users + can iterate on generated artifacts (e.g. ``output/skills//``) via + natural-language follow-ups without giving the agent unrestricted write + access to the wiki. + """ + wiki_root = str(kb_dir / "wiki") + kb_root = str(kb_dir) + base = build_query_agent(wiki_root, model, language=language) + + @function_tool + def write_file(path: str, content: str) -> str: + """Write a text file under the KB. + + Allowed paths (relative to KB root): + * ``wiki/explorations/**`` — chat-derived notes. + * ``output/**`` — generator artifacts (skills, etc.). + + Any other path is rejected. Parent directories are created. + + Args: + path: File path relative to KB root + (e.g. ``"output/skills/demo/SKILL.md"``). + content: Full text content to write (overwrites if file exists). + """ + return write_kb_file(path, content, kb_root) + + return base.clone(tools=[*base.tools, write_file]) + + async def run_query( question: str, kb_dir: Path, diff --git a/openkb/agent/tools.py b/openkb/agent/tools.py index 8071e821..57f105df 100644 --- a/openkb/agent/tools.py +++ b/openkb/agent/tools.py @@ -167,6 +167,46 @@ def read_wiki_image(path: str, wiki_root: str) -> dict: return {"type": "image", "image_url": f"data:{mime};base64,{b64}"} +def write_kb_file(path: str, content: str, kb_root: str) -> str: + """Write a text file under the KB, restricted to safe write zones. + + Allowed prefixes (relative to *kb_root*): + * ``wiki/explorations/**`` — user-saved chat transcripts and notes. + * ``output/**`` — generator artifacts (skills, etc.) the + user iterates on via natural-language chat follow-ups. + + Parent directories are created automatically. Any path outside the + allow-list is rejected. + + Args: + path: File path relative to *kb_root*. + content: Text content to write. + kb_root: Absolute path to the KB root directory. + + Returns: + ``"Written: {path}"`` on success, or an access-denied message. + """ + root = Path(kb_root).resolve() + full_path = (root / path).resolve() + if not full_path.is_relative_to(root): + return "Access denied: path escapes KB root." + rel = full_path.relative_to(root) + parts = rel.parts + allowed = ( + len(parts) >= 2 and parts[0] == "wiki" and parts[1] == "explorations" + ) or ( + len(parts) >= 1 and parts[0] == "output" + ) + if not allowed: + return ( + "Access denied: writes are only allowed under " + "wiki/explorations/** or output/**." + ) + full_path.parent.mkdir(parents=True, exist_ok=True) + full_path.write_text(content, encoding="utf-8") + return f"Written: {path}" + + def write_wiki_file(path: str, content: str, wiki_root: str) -> str: """Write or overwrite a Markdown file in the wiki. diff --git a/tests/test_skill_chat_slash.py b/tests/test_skill_chat_slash.py new file mode 100644 index 00000000..f1eb6dc4 --- /dev/null +++ b/tests/test_skill_chat_slash.py @@ -0,0 +1,64 @@ +"""Tests for the /skill new slash command inside openkb chat.""" +from __future__ import annotations + +import pytest +from unittest.mock import AsyncMock, patch + +from prompt_toolkit.styles import Style + +from openkb.agent.chat import _handle_slash +from openkb.agent.chat_session import ChatSession + + +def _make_kb(tmp_path): + (tmp_path / ".openkb").mkdir() + (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n") + (tmp_path / ".openkb" / "chats").mkdir() + (tmp_path / "wiki" / "concepts").mkdir(parents=True) + (tmp_path / "wiki" / "summaries").mkdir(parents=True) + (tmp_path / "wiki" / "index.md").write_text("# index\n") + return tmp_path + + +@pytest.mark.asyncio +async def test_slash_skill_new_calls_generator(tmp_path): + kb = _make_kb(tmp_path) + session = ChatSession.new(kb, "gpt-4o-mini", "en") + style = Style.from_dict({}) + + async def fake_run(kb_dir, skill_name, intent, model): + target = kb_dir / "output" / "skills" / skill_name + target.mkdir(parents=True, exist_ok=True) + (target / "SKILL.md").write_text( + f"---\nname: {skill_name}\ndescription: t\n---\n\n# {skill_name}\n" + ) + + with patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)): + action = await _handle_slash( + '/skill new demo "test intent"', kb, session, style + ) + + assert action is None # continues chat session + assert (kb / "output" / "skills" / "demo" / "SKILL.md").exists() + assert (kb / ".claude-plugin" / "marketplace.json").exists() + + +@pytest.mark.asyncio +async def test_slash_skill_new_reports_usage_when_args_missing(tmp_path): + kb = _make_kb(tmp_path) + session = ChatSession.new(kb, "gpt-4o-mini", "en") + style = Style.from_dict({}) + + action = await _handle_slash('/skill new', kb, session, style) + assert action is None + # No skill written + assert not (kb / "output").exists() + + +@pytest.mark.asyncio +async def test_slash_skill_unknown_subcommand(tmp_path): + kb = _make_kb(tmp_path) + session = ChatSession.new(kb, "gpt-4o-mini", "en") + style = Style.from_dict({}) + action = await _handle_slash('/skill list', kb, session, style) + assert action is None From 4e99089bd8346cdec8656767924cc7f6b1a61b97 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:35:52 +0800 Subject: [PATCH 13/25] fix(skill): harden /skill new slash + write_kb_file edge cases - /skill new: catch shlex.split ValueError on unclosed quotes so a typo doesn't crash the chat REPL - write_kb_file: reject bare directory paths (e.g. 'output') that would otherwise raise IsADirectoryError on write_text - chat.py: drop stale build_query_agent import (chat now uses build_chat_agent) - test_chat_slash_commands.py: update patch target from build_query_agent -> build_chat_agent so the test exercises the right symbol - Add tests/test_write_kb_file.py covering allow-list, traversal, bare-directory rejection --- openkb/agent/chat.py | 8 +++- openkb/agent/tools.py | 13 +++-- tests/test_chat_slash_commands.py | 2 +- tests/test_write_kb_file.py | 79 +++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 tests/test_write_kb_file.py diff --git a/openkb/agent/chat.py b/openkb/agent/chat.py index 04ffe4f7..4b4bb7e9 100644 --- a/openkb/agent/chat.py +++ b/openkb/agent/chat.py @@ -23,7 +23,7 @@ from prompt_toolkit.styles import Style from openkb.agent.chat_session import ChatSession -from openkb.agent.query import MAX_TURNS, build_chat_agent, build_query_agent +from openkb.agent.query import MAX_TURNS, build_chat_agent from openkb.log import append_log @@ -500,7 +500,11 @@ async def _handle_slash_skill(arg: str, kb_dir: Path, style: Style) -> None: """Dispatch ``/skill new ""`` and any future skill subcommands.""" import shlex - parts = shlex.split(arg) if arg else [] + try: + parts = shlex.split(arg) if arg else [] + except ValueError as exc: + _fmt(style, ("class:error", f"[ERROR] Could not parse: {exc}\n")) + return if not parts: _fmt(style, ("class:error", "Usage: /skill new \"\"\n")) return diff --git a/openkb/agent/tools.py b/openkb/agent/tools.py index 57f105df..76520785 100644 --- a/openkb/agent/tools.py +++ b/openkb/agent/tools.py @@ -186,21 +186,26 @@ def write_kb_file(path: str, content: str, kb_root: str) -> str: Returns: ``"Written: {path}"`` on success, or an access-denied message. """ + if not path: + return "Access denied: path must be a file under wiki/explorations/ or output/." root = Path(kb_root).resolve() full_path = (root / path).resolve() if not full_path.is_relative_to(root): return "Access denied: path escapes KB root." rel = full_path.relative_to(root) parts = rel.parts + # Require a file path with at least one component beyond the allow-list + # prefix, so a bare directory name (e.g. "output") does not slip through + # and crash on write_text with IsADirectoryError. allowed = ( - len(parts) >= 2 and parts[0] == "wiki" and parts[1] == "explorations" + len(parts) >= 3 and parts[0] == "wiki" and parts[1] == "explorations" ) or ( - len(parts) >= 1 and parts[0] == "output" + len(parts) >= 2 and parts[0] == "output" ) if not allowed: return ( - "Access denied: writes are only allowed under " - "wiki/explorations/** or output/**." + "Access denied: path must be a file under " + "wiki/explorations/ or output/." ) full_path.parent.mkdir(parents=True, exist_ok=True) full_path.write_text(content, encoding="utf-8") diff --git a/tests/test_chat_slash_commands.py b/tests/test_chat_slash_commands.py index 2a781da5..a77d3bda 100644 --- a/tests/test_chat_slash_commands.py +++ b/tests/test_chat_slash_commands.py @@ -176,7 +176,7 @@ async def prompt_async(self) -> str: p, collected = _collect_fmt() with ( p, - patch("openkb.agent.chat.build_query_agent", return_value=object()), + patch("openkb.agent.chat.build_chat_agent", return_value=object()), patch("openkb.agent.chat._print_header"), patch("openkb.agent.chat._make_prompt_session", return_value=prompt), patch("openkb.agent.chat._handle_slash", new_callable=AsyncMock, side_effect=KeyboardInterrupt), diff --git a/tests/test_write_kb_file.py b/tests/test_write_kb_file.py new file mode 100644 index 00000000..7688b1b6 --- /dev/null +++ b/tests/test_write_kb_file.py @@ -0,0 +1,79 @@ +"""Regression tests for ``openkb.agent.tools.write_kb_file``. + +Covers the allow-list (``wiki/explorations/**`` and ``output/**``), path +traversal rejection, the bare-directory guard (e.g. ``"output"`` alone), +and automatic parent-directory creation. +""" +from __future__ import annotations + +from pathlib import Path + +import pytest + +from openkb.agent.tools import write_kb_file + + +@pytest.fixture +def kb_root(tmp_path: Path) -> str: + return str(tmp_path) + + +def test_rejects_empty_path(kb_root: str) -> None: + result = write_kb_file("", "hi", kb_root) + assert result.startswith("Access denied") + + +def test_rejects_bare_output_directory(kb_root: str) -> None: + # "output" alone resolves to the directory itself and would crash + # write_text with IsADirectoryError. The guard must reject it. + result = write_kb_file("output", "hi", kb_root) + assert result.startswith("Access denied") + # And, critically, no file/dir should have been written at that name. + assert not (Path(kb_root) / "output").exists() or (Path(kb_root) / "output").is_dir() + + +def test_rejects_bare_wiki_explorations_directory(kb_root: str) -> None: + result = write_kb_file("wiki/explorations", "hi", kb_root) + assert result.startswith("Access denied") + + +def test_rejects_outside_allow_list(kb_root: str) -> None: + result = write_kb_file("wiki/summaries/x.md", "hi", kb_root) + assert result.startswith("Access denied") + assert not (Path(kb_root) / "wiki" / "summaries" / "x.md").exists() + + +def test_accepts_output_skill_path(kb_root: str) -> None: + result = write_kb_file("output/skills/demo/SKILL.md", "# demo\n", kb_root) + assert result == "Written: output/skills/demo/SKILL.md" + written = Path(kb_root) / "output" / "skills" / "demo" / "SKILL.md" + assert written.read_text(encoding="utf-8") == "# demo\n" + + +def test_accepts_wiki_exploration(kb_root: str) -> None: + result = write_kb_file("wiki/explorations/transcript.md", "notes", kb_root) + assert result == "Written: wiki/explorations/transcript.md" + written = Path(kb_root) / "wiki" / "explorations" / "transcript.md" + assert written.read_text(encoding="utf-8") == "notes" + + +def test_rejects_path_traversal(kb_root: str) -> None: + result = write_kb_file("../escape.md", "evil", kb_root) + assert result.startswith("Access denied") + # Must not have written outside the KB root. + assert not (Path(kb_root).parent / "escape.md").exists() + + +def test_rejects_traversal_via_allowed_prefix(kb_root: str) -> None: + # Even when starting with an allowed-looking prefix, traversal must escape + # the KB root (Path.resolve normalizes ``..``) and be rejected. + result = write_kb_file("output/../../escape.md", "evil", kb_root) + assert result.startswith("Access denied") + + +def test_creates_parent_directories(kb_root: str) -> None: + # Deeply nested target with no pre-existing parents. + target = "output/skills/new/nested/deep/SKILL.md" + result = write_kb_file(target, "deep", kb_root) + assert result == f"Written: {target}" + assert (Path(kb_root) / target).read_text(encoding="utf-8") == "deep" From c961e9a2861ad4c71bd5bcff5090e8ff222928a6 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:37:36 +0800 Subject: [PATCH 14/25] docs(skill): README + CONTRIBUTING + PR template for skill submissions --- .../PULL_REQUEST_TEMPLATE/skill_submission.md | 22 ++++++ CONTRIBUTING.md | 71 +++++++++++++++++++ README.md | 41 +++++++++++ 3 files changed, 134 insertions(+) create mode 100644 .github/PULL_REQUEST_TEMPLATE/skill_submission.md create mode 100644 CONTRIBUTING.md diff --git a/.github/PULL_REQUEST_TEMPLATE/skill_submission.md b/.github/PULL_REQUEST_TEMPLATE/skill_submission.md new file mode 100644 index 00000000..a413744a --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/skill_submission.md @@ -0,0 +1,22 @@ +## Skill submission + +- **Name:** `` +- **What it does (one sentence):** +- **Source material:** + +### Checklist + +- [ ] I have the right to redistribute the source material this skill is + derived from (public, my own work, properly licensed, or fair-use + methodology only — not bulk-copied copyrighted text). +- [ ] `SKILL.md` has valid YAML frontmatter with `name:` and `description:` + (description ≤ 1024 chars). +- [ ] All `[[references/...]]` links resolve. +- [ ] I added an entry under `plugins[0].skills` in + `.claude-plugin/marketplace.json`. +- [ ] I installed and tested this skill locally in at least one agent CLI + (Claude Code / Codex CLI / Gemini CLI / Cursor). + +### Notes for reviewers + + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..dfbb7eb2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,71 @@ +# Contributing to OpenKB + +Thanks for your interest in OpenKB! There are two ways to contribute: code (bug +fixes, new features) and skills (compiled knowledge artifacts). + +## Code contributions + +Standard GitHub workflow: + +1. Fork the repo and create a feature branch off `main`. +2. Add tests for any new behaviour. Run `uv run pytest` to verify they pass. +3. Open a PR with a clear description and link to any related issues. +4. Address review feedback. A maintainer will merge once everything looks good. + +## Submitting a skill + +OpenKB doubles as a registry for compiled-knowledge skills. The `skills/` +directory in this repo hosts the official `openkb` skill plus +community-contributed skills installable via: + +```bash +npx skills@latest add VectifyAI/OpenKB +``` + +To submit your skill: + +### 1. Compile and test locally + +```bash +cd +openkb skill new "" +cp -r output/skills/ ~/.claude/skills/ +# Verify the skill activates in a Claude Code session on a relevant question +``` + +### 2. Open a PR against this repo + +1. Fork `VectifyAI/OpenKB`. +2. Copy your skill into the fork at `skills//`. +3. Add a new entry under `plugins[0].skills` in `.claude-plugin/marketplace.json`: + ```json + "./skills/" + ``` +4. Open a PR with the "Skill submission" template (auto-applied when you create + the PR). + +### 3. Review + +A maintainer will review your submission against this checklist: + +- `SKILL.md` has a valid YAML frontmatter with `name:` and `description:` + (description ≤ 1024 characters). +- The `description` is specific (not "a skill about X"). +- All `[[references/...]]` links resolve to files you actually included. +- Skill name doesn't clash with an existing entry. +- No obvious copyright-infringing content (large verbatim copies of recent + copyrighted books or papers without authorisation). + +You're responsible for confirming you have rights to redistribute the source +material your skill is derived from. The PR template includes a checkbox for +this. + +### What makes a good community skill + +- **Methodology-focused**, not bulk-copied content. "How to reason about X" + beats "the full text of X". +- **Specific scope** so the `description:` field can be precise enough that + loading agents pick it up at the right time. +- **Tested in at least one agent CLI** before submission. + +Thanks for sharing what you've compiled! diff --git a/README.md b/README.md index 840a051d..41278d29 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume |---|---| | `openkb init` | Initialize a new knowledge base (interactive) | | openkb add <file_or_dir_or_URL> | Add documents and compile to wiki. URL ingest auto-detects PDF (saved as `.pdf` → PageIndex / markitdown) vs HTML (trafilatura main-content extract → `.md`) | +| openkb skill new <name> "<intent>" | Compile a skill from this KB's wiki into `output/skills//` and update the marketplace manifest | | openkb remove <doc> | Remove a document and clean up its wiki pages, images, registry, and PageIndex state (use `--dry-run` to preview, `--keep-raw` / `--keep-empty-concepts` to retain artifacts) | | openkb query "question" | Ask a question over the knowledge base (use `--save` to save the answer to `wiki/explorations/`) | | `openkb chat` | Start an interactive multi-turn chat (use `--resume`, `--list`, `--delete` to manage sessions) | @@ -161,6 +162,46 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume +### Skills — compile your wiki into a redistributable skill + +Once you have a populated wiki, you can compile a subset of it into an +**Anthropic Skill** — a portable folder that Claude Code, Codex CLI, +Gemini CLI, and Cursor all know how to load. + +```bash +openkb skill new karpathy-thinking \ + "Reason about transformers and attention in Karpathy's style" +``` + +This produces `output/skills/karpathy-thinking/` with `SKILL.md`, +optional `references/`, and an auto-updated +`.claude-plugin/marketplace.json` for distribution. + +**Install locally:** + +```bash +cp -r output/skills/karpathy-thinking ~/.claude/skills/ +``` + +**Share with others:** + +Push your KB directory to GitHub, then anyone can install all your skills with one command: + +```bash +npx skills@latest add / +``` + +You can also iterate inside chat: + +``` +/skill new karpathy-thinking "Reason about transformers like Karpathy" +[generation streams] +> description is too generic, make it about transformer implementations specifically +[agent edits SKILL.md frontmatter in place] +``` + +See [CONTRIBUTING.md](CONTRIBUTING.md) for how to submit your compiled skill back to the OpenKB community registry. + ### Interactive Chat `openkb chat` opens an interactive chat session over your wiki knowledge base. Unlike the one-shot `openkb query`, each turn carries the conversation history, so you can dig into a topic without re-typing context. From 439d017b083b40c2232236134a21b3ac579579ea Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 13:51:01 +0800 Subject: [PATCH 15/25] fix(skill): align tool names + add safety gates to chat /skill new MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit C1: /skill new in chat had only name validation — no wiki-exists check, no wiki-content check, no overwrite guard. The CLI had all three. Extract the gates into _preflight_skill_new and call from both. Add explicit 'remove existing skill first' message in chat (no -y equivalent there). C2: System prompt advertised tool names (list_wiki_dir, read_wiki_file, write_skill_file) that didn't match what was registered with @function_tool (list_wiki, read_wiki, write_skill). LLM saw the registered names; prompt references would confuse it. Rename the wrappers to match. I1: query_wiki was a sync @function_tool calling asyncio.run() on run_query — works only because openai-agents SDK runs sync tools on worker threads. Convert to async @function_tool so the runner awaits it in the same loop, eliminating the nested-asyncio fragility. Add 2 regression tests for the chat safety gates. --- openkb/agent/chat.py | 14 +++++- openkb/agent/skill_compiler.py | 28 ++++++------ openkb/cli.py | 79 ++++++++++++++++++++++------------ tests/test_skill_chat_slash.py | 41 ++++++++++++++++++ 4 files changed, 118 insertions(+), 44 deletions(-) diff --git a/openkb/agent/chat.py b/openkb/agent/chat.py index 4b4bb7e9..2f523dc3 100644 --- a/openkb/agent/chat.py +++ b/openkb/agent/chat.py @@ -521,12 +521,22 @@ async def _handle_slash_skill(arg: str, kb_dir: Path, style: Style) -> None: name = parts[1] intent = " ".join(parts[2:]) - from openkb.cli import _validate_skill_name - err = _validate_skill_name(name) + # Use the same safety gates as the CLI (name validation, wiki dir, + # wiki content). Chat doesn't have a -y flag, so existing skills + # block with a clear instruction to delete first. + from openkb.cli import _preflight_skill_new + err = _preflight_skill_new(kb_dir, name, yes_flag=False) if err: _fmt(style, ("class:error", f"[ERROR] {err}\n")) return + target = kb_dir / "output" / "skills" / name + if target.exists(): + _fmt(style, ("class:error", + f"[ERROR] output/skills/{name}/ already exists. Remove it first " + f"with `rm -rf output/skills/{name}` and re-run.\n")) + return + # Load model from KB config from openkb.config import load_config, DEFAULT_CONFIG config = load_config(kb_dir / ".openkb" / "config.yaml") diff --git a/openkb/agent/skill_compiler.py b/openkb/agent/skill_compiler.py index 79b64751..0733673a 100644 --- a/openkb/agent/skill_compiler.py +++ b/openkb/agent/skill_compiler.py @@ -13,16 +13,15 @@ """ from __future__ import annotations -import asyncio from pathlib import Path from agents import Agent, Runner, function_tool from agents.model_settings import ModelSettings from openkb.agent.skill_tools import ( - list_wiki_dir, - read_wiki_file_for_skill, - write_skill_file, + list_wiki_dir as _list_wiki_dir_impl, + read_wiki_file_for_skill as _read_wiki_file_impl, + write_skill_file as _write_skill_file_impl, ) from openkb.prompts import load_prompt from openkb.schema import get_agents_md @@ -59,32 +58,31 @@ def build_skill_compile_agent( ) @function_tool - def list_wiki(directory: str) -> str: + def list_wiki_dir(directory: str) -> str: """List .md files in a wiki subdirectory (e.g. 'concepts').""" - return list_wiki_dir(directory, wiki_root) + return _list_wiki_dir_impl(directory, wiki_root) @function_tool - def read_wiki(path: str) -> str: + def read_wiki_file(path: str) -> str: """Read a wiki markdown file by path relative to wiki/ (e.g. 'concepts/attention.md').""" - return read_wiki_file_for_skill(path, wiki_root) + return _read_wiki_file_impl(path, wiki_root) @function_tool - def query_wiki(question: str) -> str: + async def query_wiki(question: str) -> str: """Run a semantic query over the wiki and return the answer. Use sparingly — this is itself an LLM call. Prefer reading specific files when you already know which one you want. """ - # Lazy import to avoid circular dependency at module import. + # Lazy import to avoid a circular dependency at module load time. from openkb.agent.query import run_query kb_dir = Path(wiki_root).parent - config_model = model - return asyncio.run(run_query(question, kb_dir, config_model, stream=False)) + return await run_query(question, kb_dir, model, stream=False) @function_tool - def write_skill(path: str, content: str) -> str: + def write_skill_file(path: str, content: str) -> str: """Write a file under the skill directory.""" - return write_skill_file(path, content, skill_root) + return _write_skill_file_impl(path, content, skill_root) @function_tool def done(summary: str) -> str: @@ -94,7 +92,7 @@ def done(summary: str) -> str: return Agent( name="skill-compiler", instructions=instructions, - tools=[list_wiki, read_wiki, query_wiki, write_skill, done], + tools=[list_wiki_dir, read_wiki_file, query_wiki, write_skill_file, done], model=f"litellm/{model}", model_settings=ModelSettings(parallel_tool_calls=False), ) diff --git a/openkb/cli.py b/openkb/cli.py index b96894ba..95e8d8d1 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -165,6 +165,54 @@ def _validate_skill_name(name: str) -> str | None: return None +def _preflight_skill_new( + kb_dir: Path, name: str, yes_flag: bool, +) -> str | None: + """Run all the safety gates for ``openkb skill new`` / ``/skill new``. + + Returns ``None`` if it's safe to proceed (caller will then either + proceed or, for the overwrite case, call ``_clear_existing_skill_dir`` + after confirming with the user). Returns an error message string if + one of the gates trips. + + NOTE: This intentionally does NOT handle the overwrite confirmation + itself — that's caller-specific (TTY-based ``click.confirm`` in CLI; + explicit ``--force``-style flag in chat). It only returns an error if + the target dir exists AND ``yes_flag`` is False, and the caller is + expected to detect that error message and prompt as appropriate. + """ + err = _validate_skill_name(name) + if err: + return err + + wiki = kb_dir / "wiki" + if not wiki.is_dir(): + return ( + "No wiki found in this KB. Run `openkb add ` to " + "ingest documents first." + ) + + has_content = any( + (wiki / sub).is_dir() and any((wiki / sub).iterdir()) + for sub in ("concepts", "summaries") + ) + if not has_content: + return ( + "Wiki has no compiled content yet. Ingest at least one " + "document with `openkb add` first." + ) + + return None + + +def _clear_existing_skill_dir(kb_dir: Path, name: str) -> None: + """Delete an existing ``/output/skills//`` directory.""" + import shutil + target = kb_dir / "output" / "skills" / name + if target.exists(): + shutil.rmtree(target) + + def add_single_file(file_path: Path, kb_dir: Path) -> Literal["added", "skipped", "failed"]: """Convert, index, and compile a single document into the knowledge base. @@ -1393,7 +1441,6 @@ def skill_new(ctx, name, intent, yes_flag): openkb skill new karpathy-thinking "Reason about transformers like Karpathy" """ import asyncio - import shutil import sys kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) @@ -1401,38 +1448,16 @@ def skill_new(ctx, name, intent, yes_flag): click.echo("No knowledge base found. Run `openkb init` first.", err=True) ctx.exit(1) - # Validate name - err = _validate_skill_name(name) + err = _preflight_skill_new(kb_dir, name, yes_flag) if err: click.echo(f"[ERROR] {err}", err=True) - ctx.exit(2) - - # Validate wiki exists and has content - wiki = kb_dir / "wiki" - if not wiki.is_dir(): - click.echo( - "[ERROR] No wiki found in this KB. Run `openkb add ` " - "to ingest documents first.", - err=True, - ) - ctx.exit(1) - has_content = any( - (wiki / sub).is_dir() and any((wiki / sub).iterdir()) - for sub in ("concepts", "summaries") - ) - if not has_content: - click.echo( - "[ERROR] Wiki has no compiled content yet. Ingest at least one " - "document with `openkb add` first.", - err=True, - ) ctx.exit(1) - # Overwrite handling + # Overwrite handling (CLI-specific) target = kb_dir / "output" / "skills" / name if target.exists(): if yes_flag: - shutil.rmtree(target) + _clear_existing_skill_dir(kb_dir, name) elif sys.stdin.isatty(): if not click.confirm( f"output/skills/{name}/ already exists. Overwrite?", @@ -1440,7 +1465,7 @@ def skill_new(ctx, name, intent, yes_flag): ): click.echo("Aborted.") ctx.exit(1) - shutil.rmtree(target) + _clear_existing_skill_dir(kb_dir, name) else: click.echo( f"[ERROR] output/skills/{name}/ exists. Pass -y to overwrite " diff --git a/tests/test_skill_chat_slash.py b/tests/test_skill_chat_slash.py index f1eb6dc4..377f1a91 100644 --- a/tests/test_skill_chat_slash.py +++ b/tests/test_skill_chat_slash.py @@ -17,6 +17,9 @@ def _make_kb(tmp_path): (tmp_path / "wiki" / "concepts").mkdir(parents=True) (tmp_path / "wiki" / "summaries").mkdir(parents=True) (tmp_path / "wiki" / "index.md").write_text("# index\n") + # Populate so wiki-content gate accepts + (tmp_path / "wiki" / "concepts" / "demo.md").write_text("# demo\n") + (tmp_path / "wiki" / "summaries" / "demo.md").write_text("# demo\n") return tmp_path @@ -62,3 +65,41 @@ async def test_slash_skill_unknown_subcommand(tmp_path): style = Style.from_dict({}) action = await _handle_slash('/skill list', kb, session, style) assert action is None + + +@pytest.mark.asyncio +async def test_slash_skill_new_rejects_empty_wiki(tmp_path): + """Chat / slash command must catch freshly-init'd KBs (no compiled content).""" + kb = tmp_path + (kb / ".openkb").mkdir() + (kb / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n") + (kb / ".openkb" / "chats").mkdir() + # Empty wiki/ — exactly what `openkb init` creates + (kb / "wiki" / "concepts").mkdir(parents=True) + (kb / "wiki" / "summaries").mkdir(parents=True) + (kb / "wiki" / "index.md").write_text("# index\n") + + session = ChatSession.new(kb, "gpt-4o-mini", "en") + style = Style.from_dict({}) + + action = await _handle_slash('/skill new demo "intent"', kb, session, style) + assert action is None + assert not (kb / "output").exists() + + +@pytest.mark.asyncio +async def test_slash_skill_new_rejects_when_target_exists(tmp_path): + """Chat / slash command must not silently overwrite an existing skill.""" + kb = _make_kb(tmp_path) + (kb / "wiki" / "concepts" / "x.md").write_text("x") + (kb / "wiki" / "summaries" / "x.md").write_text("x") + (kb / "output" / "skills" / "demo").mkdir(parents=True) + (kb / "output" / "skills" / "demo" / "stale.txt").write_text("old") + + session = ChatSession.new(kb, "gpt-4o-mini", "en") + style = Style.from_dict({}) + + action = await _handle_slash('/skill new demo "intent"', kb, session, style) + assert action is None + # stale.txt must still be there (we didn't overwrite) + assert (kb / "output" / "skills" / "demo" / "stale.txt").read_text() == "old" From 746720098ccce057498840ae9f928e7f8350ef0e Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 15:04:53 +0800 Subject: [PATCH 16/25] fix(skill): lock marketplace naming to openkb@vectify convention The previous impl used the KB directory name as both the marketplace 'name' and the plugin 'name', and stitched together a metadata description by truncating the first skill's SKILL.md description at 200 chars (often mid-word). Lock the convention to match skills/openkb/.claude-plugin/marketplace.json from the official skill: - marketplace name: 'vectify' (always) - plugin name: 'openkb' (always) - description: fixed string, no SKILL.md content injection, no truncation Different KBs are distinguished by / URL, not manifest name. Users get one canonical install command (/plugin install openkb@vectify) regardless of which KB they're consuming. Also fix _git_owner to pass cwd=kb_dir so 'openkb --kb-dir ... skill new' run from anywhere reads the KB's git config, not the process CWD. --- openkb/marketplace.py | 36 ++++++++++++++++-------------------- tests/test_marketplace.py | 38 +++++++++++++++++++++++++++++++++----- 2 files changed, 49 insertions(+), 25 deletions(-) diff --git a/openkb/marketplace.py b/openkb/marketplace.py index 48887afe..e78df148 100644 --- a/openkb/marketplace.py +++ b/openkb/marketplace.py @@ -27,11 +27,12 @@ _DESCRIPTION_RE = re.compile(r"^description:\s*(.+?)\s*$", re.MULTILINE) -def _git_owner() -> dict[str, str]: - """Read user.name and user.email from git config for the manifest owner. +def _git_owner(kb_dir: Path) -> dict[str, str]: + """Read user.name and user.email from git config (run in kb_dir context). - Falls back to placeholders if git isn't configured — the manifest is - still valid, just less helpful for marketplace listings. + Falls back to placeholders if git isn't configured. ``cwd=kb_dir`` so + that ``git config`` resolves the KB's local-or-walked-up settings, + not the process's working directory at the time of CLI invocation. """ import subprocess @@ -40,6 +41,7 @@ def _git(key: str) -> str: result = subprocess.run( ["git", "config", "--get", key], capture_output=True, text=True, timeout=2, + cwd=str(kb_dir), ) return result.stdout.strip() except (subprocess.SubprocessError, FileNotFoundError): @@ -71,11 +73,6 @@ def _read_skill_description(skill_md: Path) -> str: return desc_match.group(1).strip() -def _kb_name(kb_dir: Path) -> str: - """Use the KB directory name as the marketplace name (sluggable).""" - return kb_dir.name - - def _list_skill_dirs(kb_dir: Path) -> list[Path]: """Return skill directories under /output/skills/ that contain a SKILL.md.""" skills_root = kb_dir / "output" / "skills" @@ -91,23 +88,22 @@ def _build_manifest(kb_dir: Path) -> dict[str, Any]: skills = _list_skill_dirs(kb_dir) skill_paths = [f"./output/skills/{d.name}" for d in skills] - # Aggregate description for the manifest metadata - name = _kb_name(kb_dir) + # Fixed clean descriptions — no truncation, no SKILL.md interpolation. + # Naming convention is locked to `openkb@vectify` so users get one + # canonical install command regardless of which KB they're consuming; + # different KBs are distinguished by / URL. metadata_desc = ( - f"Skills compiled from the '{name}' knowledge base via OpenKB." + f"Skills compiled from the {kb_dir.name} knowledge base via OpenKB." ) - if skills: - first_desc = _read_skill_description(skills[0] / "SKILL.md") - if first_desc: - metadata_desc += f" Featured: {first_desc[:200]}" + plugin_desc = "Knowledge skills compiled from this OpenKB-managed knowledge base." # Pull KB config for version if available; default to 0.1.0 config = load_config(kb_dir / ".openkb" / "config.yaml") version = str(config.get("version", "0.1.0")) - owner = _git_owner() + owner = _git_owner(kb_dir) return { - "name": name, + "name": "vectify", "owner": owner, "metadata": { "description": metadata_desc, @@ -115,8 +111,8 @@ def _build_manifest(kb_dir: Path) -> dict[str, Any]: }, "plugins": [ { - "name": name, - "description": metadata_desc, + "name": "openkb", + "description": plugin_desc, "source": "./", "version": version, "author": owner, diff --git a/tests/test_marketplace.py b/tests/test_marketplace.py index 62c94f6c..dee118d5 100644 --- a/tests/test_marketplace.py +++ b/tests/test_marketplace.py @@ -36,10 +36,8 @@ def test_regenerate_creates_manifest_with_one_skill(tmp_path): manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) assert manifest["plugins"][0]["skills"] == ["./output/skills/karpathy-thinking"] - # The plugin's description carries the SKILL.md description for the latest entry, - # OR we use a fixed manifest-level description — implementation choice; we just - # check the SKILL.md description appears somewhere in the manifest JSON. - assert "Reason like Karpathy" in json.dumps(manifest) + # Naming convention is locked: top-level marketplace name is always "vectify". + assert manifest["name"] == "vectify" def test_regenerate_lists_multiple_skills_alphabetical(tmp_path): @@ -67,7 +65,7 @@ def test_regenerate_replaces_existing_file(tmp_path): regenerate_marketplace(kb) manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) - assert manifest["name"] != "stale" + assert manifest["name"] == "vectify" assert manifest["plugins"][0]["skills"] == ["./output/skills/demo"] @@ -155,3 +153,33 @@ def fake_run(cmd, **kwargs): assert manifest["owner"]["name"] == "openkb-user" # email is optional when missing assert "email" not in manifest["owner"] or manifest["owner"]["email"] == "" + + +def test_regenerate_uses_openkb_at_vectify_convention(tmp_path): + """All OpenKB-generated marketplaces must self-identify as 'vectify' + (top level) with a plugin named 'openkb', so users install via the + canonical `openkb@vectify` regardless of which KB they're consuming. + Different KBs are distinguished by / URL, not manifest name.""" + kb = _make_kb(tmp_path) + _make_skill(kb, "demo", "d") + regenerate_marketplace(kb) + + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + assert manifest["name"] == "vectify" + assert manifest["plugins"][0]["name"] == "openkb" + + +def test_regenerate_description_is_not_truncated(tmp_path): + """Manifest description must be a clean fixed string — no truncation + of SKILL.md content, no '...' mid-word.""" + kb = _make_kb(tmp_path) + _make_skill(kb, "demo", "the specific description goes here") + regenerate_marketplace(kb) + + manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) + # Must not contain the per-skill description (we don't inject it anymore) + assert "the specific description goes here" not in manifest["metadata"]["description"] + # Must not end with a truncated word (no trailing space-letter-letter etc.) + desc = manifest["metadata"]["description"] + assert not desc.endswith(" ") + assert desc.endswith(".") or desc.endswith("OpenKB.") From 5d76d58f1a18fa25280075da4a165399afce26a6 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 15:33:34 +0800 Subject: [PATCH 17/25] =?UTF-8?q?refactor(skill):=20address=20PR=20#57=20r?= =?UTF-8?q?eview=20feedback=20+=20rename=20compiler=E2=86=92creator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename openkb/agent/skill_compiler.py → skill_creator.py (and associated symbols + prompt file). The existing openkb/agent/compiler.py owns 'compile' for raw → wiki; this module generates a skill from compiled wiki content, which is closer to 'create' / 'distill'. Disambiguates the verb in one package. - Translate MaxTurnsExceeded into a friendly RuntimeError inside run_skill_create. Both the CLI and chat call sites only catch RuntimeError; the SDK exception leaked a Python traceback before. - Defer the rmtree-on-overwrite until after _setup_llm_key and load_config succeed. Previously an unset API key would wipe the existing skill output with nothing to replace it. - Fix marketplace.py module docstring: don't claim chat-side SKILL.md edits regenerate the manifest (they don't). - Drop unused yes_flag from _preflight_skill_new + rewrite its docstring to match what the function actually does. - Clean up github-code-quality bot findings: unused pytest imports in 2 test files, unused 'manifest' local in test_marketplace.py (replaced with the assertion the test intended), redundant in-function stdlib imports in openkb/cli.py. Add 2 regression tests: - test_run_skill_create_translates_max_turns_to_runtime_error - test_skill_new_keeps_existing_skill_when_key_setup_fails --- openkb/agent/chat.py | 2 +- .../{skill_compiler.py => skill_creator.py} | 31 +++++++---- openkb/agent/skill_tools.py | 6 +-- openkb/cli.py | 54 ++++++++++--------- openkb/generator.py | 6 +-- openkb/marketplace.py | 21 +++++--- openkb/prompts/__init__.py | 2 +- .../{skill_compile.md => skill_create.md} | 2 +- tests/test_generator.py | 4 +- tests/test_marketplace.py | 12 +++-- tests/test_skill_chat_slash.py | 2 +- tests/test_skill_cli.py | 26 +++++++-- ...kill_compiler.py => test_skill_creator.py} | 47 ++++++++++++---- tests/test_skill_tools.py | 4 +- 14 files changed, 141 insertions(+), 78 deletions(-) rename openkb/agent/{skill_compiler.py => skill_creator.py} (81%) rename openkb/prompts/{skill_compile.md => skill_create.md} (97%) rename tests/{test_skill_compiler.py => test_skill_creator.py} (59%) diff --git a/openkb/agent/chat.py b/openkb/agent/chat.py index 2f523dc3..afabc745 100644 --- a/openkb/agent/chat.py +++ b/openkb/agent/chat.py @@ -525,7 +525,7 @@ async def _handle_slash_skill(arg: str, kb_dir: Path, style: Style) -> None: # wiki content). Chat doesn't have a -y flag, so existing skills # block with a clear instruction to delete first. from openkb.cli import _preflight_skill_new - err = _preflight_skill_new(kb_dir, name, yes_flag=False) + err = _preflight_skill_new(kb_dir, name) if err: _fmt(style, ("class:error", f"[ERROR] {err}\n")) return diff --git a/openkb/agent/skill_compiler.py b/openkb/agent/skill_creator.py similarity index 81% rename from openkb/agent/skill_compiler.py rename to openkb/agent/skill_creator.py index 0733673a..2e023c4e 100644 --- a/openkb/agent/skill_compiler.py +++ b/openkb/agent/skill_creator.py @@ -1,9 +1,9 @@ -"""The skill-compile agent. +"""The skill-create agent. Builds on the same openai-agents-SDK + LiteLLM stack as the query agent in ``openkb.agent.query``. The differences: -* System prompt comes from ``openkb/prompts/skill_compile.md`` and is +* System prompt comes from ``openkb/prompts/skill_create.md`` and is interpolated with the user's intent and the target skill name. * Tools are scoped: read the wiki, query the wiki, write only within the target skill directory. @@ -29,7 +29,7 @@ MAX_TURNS = 80 # higher than query (50) because compile can write multiple files -def build_skill_compile_agent( +def build_skill_create_agent( *, wiki_root: str, skill_root: str, @@ -51,7 +51,7 @@ def build_skill_compile_agent( Path(skill_root).mkdir(parents=True, exist_ok=True) wiki_schema = get_agents_md(Path(wiki_root)) - instructions = load_prompt("skill_compile").format( + instructions = load_prompt("skill_create").format( intent=intent, skill_name=skill_name, wiki_schema=wiki_schema, @@ -90,7 +90,7 @@ def done(summary: str) -> str: return f"Compilation marked done: {summary}" return Agent( - name="skill-compiler", + name="skill-creator", instructions=instructions, tools=[list_wiki_dir, read_wiki_file, query_wiki, write_skill_file, done], model=f"litellm/{model}", @@ -98,7 +98,7 @@ def done(summary: str) -> str: ) -async def run_skill_compile( +async def run_skill_create( *, kb_dir: Path, skill_name: str, @@ -108,12 +108,13 @@ async def run_skill_compile( """Compile a single skill from the KB's wiki. Returns the path to the produced skill directory. Raises - ``RuntimeError`` if the agent finishes without writing ``SKILL.md``. + ``RuntimeError`` if the agent finishes without writing ``SKILL.md``, + or if the SDK hits its turn cap before the agent declares done. """ wiki_root = str(kb_dir / "wiki") skill_root = kb_dir / "output" / "skills" / skill_name - agent = build_skill_compile_agent( + agent = build_skill_create_agent( wiki_root=wiki_root, skill_root=str(skill_root), skill_name=skill_name, @@ -128,7 +129,19 @@ async def run_skill_compile( f"working method. Read the wiki, then write the skill files." ) - await Runner.run(agent, seed, max_turns=MAX_TURNS) + # Lazy import: keeps the top of this module independent of the SDK's + # internal exception layout in case its export path moves. + from agents.exceptions import MaxTurnsExceeded + + try: + await Runner.run(agent, seed, max_turns=MAX_TURNS) + except MaxTurnsExceeded as exc: + raise RuntimeError( + f"Skill compilation hit the {MAX_TURNS}-step cap before finishing. " + f"The wiki may be too large for a single skill, or the intent may " + f"be too broad. Try splitting into multiple skills with narrower " + f"intents, or pass a smaller wiki subset." + ) from exc if not (skill_root / "SKILL.md").exists(): raise RuntimeError( diff --git a/openkb/agent/skill_tools.py b/openkb/agent/skill_tools.py index ef24d94e..3b7c736e 100644 --- a/openkb/agent/skill_tools.py +++ b/openkb/agent/skill_tools.py @@ -1,8 +1,8 @@ -"""Path-scoped IO tools for the skill-compile agent. +"""Path-scoped IO tools for the skill-create agent. -The skill-compile agent runs with three capabilities: +The skill-create agent runs with three capabilities: * READ from /wiki/** (the substrate) - * QUERY the wiki via openkb query (semantic retrieval, see skill_compiler.py) + * QUERY the wiki via openkb query (semantic retrieval, see skill_creator.py) * WRITE under /output/skills//** (the only output destination) These helpers enforce those boundaries at the Python level — every tool diff --git a/openkb/cli.py b/openkb/cli.py index 95e8d8d1..5e1a26be 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -165,21 +165,20 @@ def _validate_skill_name(name: str) -> str | None: return None -def _preflight_skill_new( - kb_dir: Path, name: str, yes_flag: bool, -) -> str | None: - """Run all the safety gates for ``openkb skill new`` / ``/skill new``. - - Returns ``None`` if it's safe to proceed (caller will then either - proceed or, for the overwrite case, call ``_clear_existing_skill_dir`` - after confirming with the user). Returns an error message string if - one of the gates trips. - - NOTE: This intentionally does NOT handle the overwrite confirmation - itself — that's caller-specific (TTY-based ``click.confirm`` in CLI; - explicit ``--force``-style flag in chat). It only returns an error if - the target dir exists AND ``yes_flag`` is False, and the caller is - expected to detect that error message and prompt as appropriate. +def _preflight_skill_new(kb_dir: Path, name: str) -> str | None: + """Run shared safety gates for ``openkb skill new`` / ``/skill new``. + + Checks (in order): + * skill name is a valid kebab-case slug + * ``/wiki`` exists + * ``/wiki/concepts`` or ``/wiki/summaries`` has at least + one file (i.e. some document has been ingested + compiled) + + Returns ``None`` if all gates pass, else a single-line error message + suitable to print to the user. + + Overwrite handling is NOT done here — the CLI handles it with + ``-y`` + ``click.confirm``; chat refuses overwrite outright. """ err = _validate_skill_name(name) if err: @@ -207,7 +206,6 @@ def _preflight_skill_new( def _clear_existing_skill_dir(kb_dir: Path, name: str) -> None: """Delete an existing ``/output/skills//`` directory.""" - import shutil target = kb_dir / "output" / "skills" / name if target.exists(): shutil.rmtree(target) @@ -1440,20 +1438,29 @@ def skill_new(ctx, name, intent, yes_flag): openkb skill new karpathy-thinking "Reason about transformers like Karpathy" """ - import asyncio - import sys - kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) if kb_dir is None: click.echo("No knowledge base found. Run `openkb init` first.", err=True) ctx.exit(1) - err = _preflight_skill_new(kb_dir, name, yes_flag) + err = _preflight_skill_new(kb_dir, name) if err: click.echo(f"[ERROR] {err}", err=True) ctx.exit(1) - # Overwrite handling (CLI-specific) + # Verify LLM key + load config BEFORE touching existing output. Any + # failure here (missing API key, malformed config) must leave the old + # skill directory intact — we can't replace it if we can't proceed. + try: + _setup_llm_key(kb_dir) + except RuntimeError as exc: + click.echo(f"[ERROR] {exc}", err=True) + ctx.exit(1) + config = load_config(kb_dir / ".openkb" / "config.yaml") + model = config.get("model", DEFAULT_CONFIG["model"]) + + # Overwrite handling (CLI-specific). Done AFTER key/config so a + # missing key doesn't wipe the user's existing skill output. target = kb_dir / "output" / "skills" / name if target.exists(): if yes_flag: @@ -1474,11 +1481,6 @@ def skill_new(ctx, name, intent, yes_flag): ) ctx.exit(1) - # Load config + key - _setup_llm_key(kb_dir) - config = load_config(kb_dir / ".openkb" / "config.yaml") - model = config.get("model", DEFAULT_CONFIG["model"]) - # Run the generator from openkb.generator import Generator click.echo(f"Compiling skill '{name}'...") diff --git a/openkb/generator.py b/openkb/generator.py index f24b4b73..843a1b06 100644 --- a/openkb/generator.py +++ b/openkb/generator.py @@ -9,14 +9,14 @@ distribution mechanic as skills) Each target plugs in its own `run` coroutine. v0.1's only entry calls into -``openkb.agent.skill_compiler.run_skill_compile``. +``openkb.agent.skill_creator.run_skill_create``. """ from __future__ import annotations from pathlib import Path from typing import Literal -from openkb.agent.skill_compiler import run_skill_compile +from openkb.agent.skill_creator import run_skill_create from openkb.marketplace import regenerate_marketplace @@ -56,7 +56,7 @@ def __init__( async def run(self) -> Path: """Execute the generator. Returns the path to the produced artifact.""" - await run_skill_compile( + await run_skill_create( kb_dir=self.kb_dir, skill_name=self.name, intent=self.intent, diff --git a/openkb/marketplace.py b/openkb/marketplace.py index e78df148..394952e7 100644 --- a/openkb/marketplace.py +++ b/openkb/marketplace.py @@ -1,17 +1,22 @@ """Regenerate the per-KB Claude Code plugin marketplace manifest. -After every `openkb skill new` (and after any chat-side edit to a SKILL.md -frontmatter), this module scans ``/output/skills/*/SKILL.md`` and -rewrites ``/.claude-plugin/marketplace.json`` listing all currently +After every successful skill generation (CLI ``openkb skill new`` or +chat ``/skill new``, both via ``Generator.run``), this module scans +``/output/skills/*/SKILL.md`` and rewrites +``/.claude-plugin/marketplace.json`` listing all currently compiled skills. The schema is a subset compatible with the OpenKB repo's own ``.claude-plugin/marketplace.json``: one plugin entry per KB, with a -``skills`` array of relative paths. ``owner`` is derived from git config -so Claude Code's ``/plugin marketplace add`` accepts the manifest. Other -agent CLIs (``npx skills add``) install from the same file. - -This is a deterministic step — no LLM calls. +``skills`` array of relative paths. ``owner`` is derived from git +config (run with cwd=kb_dir) so Claude Code's ``/plugin marketplace +add`` accepts the manifest. Other agent CLIs (``npx skills add``) +install from the same file. + +This is a deterministic step — no LLM calls. If a chat-session edit +to a SKILL.md changes the description after compile, the manifest is +NOT auto-regenerated; re-run ``openkb skill new`` or +``/skill new`` to refresh it. """ from __future__ import annotations diff --git a/openkb/prompts/__init__.py b/openkb/prompts/__init__.py index c28b2b1d..b5beda88 100644 --- a/openkb/prompts/__init__.py +++ b/openkb/prompts/__init__.py @@ -15,7 +15,7 @@ def load_prompt(name: str) -> str: """Return the contents of ``openkb/prompts/.md`` as a string. Args: - name: Filename without the ``.md`` suffix (e.g. ``"skill_compile"``). + name: Filename without the ``.md`` suffix (e.g. ``"skill_create"``). """ path = _PROMPTS_DIR / f"{name}.md" return path.read_text(encoding="utf-8") diff --git a/openkb/prompts/skill_compile.md b/openkb/prompts/skill_create.md similarity index 97% rename from openkb/prompts/skill_compile.md rename to openkb/prompts/skill_create.md index 7023bf80..bc222ef5 100644 --- a/openkb/prompts/skill_compile.md +++ b/openkb/prompts/skill_create.md @@ -1,4 +1,4 @@ -You are the OpenKB skill-compile agent. Your job: read the knowledge base +You are the OpenKB skill-create agent. Your job: read the knowledge base wiki at `/wiki/` and produce a redistributable Anthropic Skill at `/output/skills/{skill_name}/`. Other agents — Claude Code, Codex CLI, Gemini CLI, Cursor — will install this skill and load it on demand, so the diff --git a/tests/test_generator.py b/tests/test_generator.py index b2e7d0de..1ef179f5 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -44,7 +44,7 @@ def test_generator_skill_target_constructs_ok(tmp_path): @pytest.mark.asyncio -async def test_generator_run_delegates_to_skill_compiler(tmp_path): +async def test_generator_run_delegates_to_skill_creator(tmp_path): kb = _make_kb(tmp_path) g = Generator( target_type="skill", @@ -53,7 +53,7 @@ async def test_generator_run_delegates_to_skill_compiler(tmp_path): kb_dir=kb, model="gpt-4o-mini", ) - with patch("openkb.generator.run_skill_compile", new=AsyncMock()) as runner, \ + with patch("openkb.generator.run_skill_create", new=AsyncMock()) as runner, \ patch("openkb.generator.regenerate_marketplace") as regen: await g.run() runner.assert_awaited_once() diff --git a/tests/test_marketplace.py b/tests/test_marketplace.py index dee118d5..a34c34ba 100644 --- a/tests/test_marketplace.py +++ b/tests/test_marketplace.py @@ -95,11 +95,13 @@ def test_regenerate_reads_description_from_frontmatter(tmp_path): regenerate_marketplace(kb) manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text()) - # Manifest's metadata.description should at minimum mention the KB context; - # the per-skill description lives in the SKILL.md itself (the manifest just - # points at the skill directories). - assert "marketplace.json" in str(kb / ".claude-plugin" / "marketplace.json") - # Reload SKILL.md to confirm description preserved + # Per-skill descriptions live in the SKILL.md frontmatter, NOT in the + # marketplace manifest. The manifest just points at skill directories; + # the loading agent (Claude Code, npx skills) reads each SKILL.md to + # discover the per-skill description. Locking this in: the description + # must NOT leak into the top-level manifest metadata. + assert "the specific description goes here" not in manifest["metadata"]["description"] + # And the description IS preserved on disk in SKILL.md skill_md = (kb / "output" / "skills" / "demo" / "SKILL.md").read_text() assert "the specific description goes here" in skill_md diff --git a/tests/test_skill_chat_slash.py b/tests/test_skill_chat_slash.py index 377f1a91..100135a2 100644 --- a/tests/test_skill_chat_slash.py +++ b/tests/test_skill_chat_slash.py @@ -36,7 +36,7 @@ async def fake_run(kb_dir, skill_name, intent, model): f"---\nname: {skill_name}\ndescription: t\n---\n\n# {skill_name}\n" ) - with patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)): + with patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): action = await _handle_slash( '/skill new demo "test intent"', kb, session, style ) diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py index 297f9e1d..4004450d 100644 --- a/tests/test_skill_cli.py +++ b/tests/test_skill_cli.py @@ -8,7 +8,6 @@ import json from unittest.mock import AsyncMock, patch -import pytest from click.testing import CliRunner from openkb.cli import cli @@ -27,7 +26,7 @@ def _make_kb(tmp_path): def _fake_compile(kb_dir, skill_name, **_kw): - """Side-effect for the patched run_skill_compile: write a minimal SKILL.md.""" + """Side-effect for the patched run_skill_create: write a minimal SKILL.md.""" target = kb_dir / "output" / "skills" / skill_name target.mkdir(parents=True, exist_ok=True) (target / "SKILL.md").write_text( @@ -43,7 +42,7 @@ async def fake_run(kb_dir, skill_name, intent, model): _fake_compile(kb_dir, skill_name) with patch("openkb.cli._find_kb_dir", return_value=kb), \ - patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)): + patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): result = runner.invoke(cli, ["skill", "new", "demo", "test intent"]) assert result.exit_code == 0, result.output @@ -124,9 +123,28 @@ async def fake_run(kb_dir, skill_name, intent, model): _fake_compile(kb_dir, skill_name) with patch("openkb.cli._find_kb_dir", return_value=kb), \ - patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)): + patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"]) assert result.exit_code == 0, result.output assert not (kb / "output" / "skills" / "demo" / "stale.txt").exists() assert (kb / "output" / "skills" / "demo" / "SKILL.md").exists() + + +def test_skill_new_keeps_existing_skill_when_key_setup_fails(tmp_path): + """If LLM key setup raises (e.g. no API key), the old skill output + must be preserved — don't rmtree before key setup is verified.""" + kb = _make_kb(tmp_path) + target = kb / "output" / "skills" / "demo" + target.mkdir(parents=True) + (target / "stale.txt").write_text("priceless") + + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb), \ + patch("openkb.cli._setup_llm_key", + side_effect=RuntimeError("no API key configured")): + result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"]) + + assert result.exit_code != 0 + # Old skill must still be there + assert (target / "stale.txt").read_text() == "priceless" diff --git a/tests/test_skill_compiler.py b/tests/test_skill_creator.py similarity index 59% rename from tests/test_skill_compiler.py rename to tests/test_skill_creator.py index 6fd06f22..e166c418 100644 --- a/tests/test_skill_compiler.py +++ b/tests/test_skill_creator.py @@ -1,4 +1,4 @@ -"""Tests for openkb.agent.skill_compiler. +"""Tests for openkb.agent.skill_creator. The agent itself is mocked (we don't want to spend tokens in unit tests). What we DO test: @@ -6,15 +6,18 @@ * System prompt gets the intent and skill_name interpolated * The skill output dir is created before the agent starts * If the agent finishes without writing SKILL.md, we surface an error + * If the SDK hits its max-turns cap, we translate to RuntimeError so + the CLI/chat call sites (which only catch RuntimeError) print a + friendly message instead of leaking a traceback. """ from __future__ import annotations import pytest from unittest.mock import AsyncMock, patch -from openkb.agent.skill_compiler import ( - build_skill_compile_agent, - run_skill_compile, +from openkb.agent.skill_creator import ( + build_skill_create_agent, + run_skill_create, ) @@ -29,7 +32,7 @@ def _make_kb(tmp_path): def test_build_agent_interpolates_intent_and_name(tmp_path): kb = _make_kb(tmp_path) - agent = build_skill_compile_agent( + agent = build_skill_create_agent( wiki_root=str(kb / "wiki"), skill_root=str(kb / "output" / "skills" / "demo"), skill_name="demo", @@ -41,7 +44,7 @@ def test_build_agent_interpolates_intent_and_name(tmp_path): @pytest.mark.asyncio -async def test_run_skill_compile_creates_output_dir(tmp_path): +async def test_run_skill_create_creates_output_dir(tmp_path): kb = _make_kb(tmp_path) target = kb / "output" / "skills" / "demo" # Fake the agent run: just write a minimal SKILL.md to simulate success. @@ -53,8 +56,8 @@ async def fake_runner(*args, **kwargs): from types import SimpleNamespace return SimpleNamespace(final_output="done") - with patch("openkb.agent.skill_compiler.Runner.run", new=AsyncMock(side_effect=fake_runner)): - await run_skill_compile( + with patch("openkb.agent.skill_creator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + await run_skill_create( kb_dir=kb, skill_name="demo", intent="test intent", @@ -65,7 +68,7 @@ async def fake_runner(*args, **kwargs): @pytest.mark.asyncio -async def test_run_skill_compile_raises_when_no_skill_md_written(tmp_path): +async def test_run_skill_create_raises_when_no_skill_md_written(tmp_path): kb = _make_kb(tmp_path) target = kb / "output" / "skills" / "demo" target.mkdir(parents=True, exist_ok=True) @@ -74,11 +77,33 @@ async def fake_runner(*args, **kwargs): from types import SimpleNamespace return SimpleNamespace(final_output="done") - with patch("openkb.agent.skill_compiler.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.agent.skill_creator.Runner.run", new=AsyncMock(side_effect=fake_runner)): with pytest.raises(RuntimeError, match="did not write SKILL.md"): - await run_skill_compile( + await run_skill_create( kb_dir=kb, skill_name="demo", intent="test intent", model="gpt-4o-mini", ) + + +@pytest.mark.asyncio +async def test_run_skill_create_translates_max_turns_to_runtime_error(tmp_path): + """MaxTurnsExceeded from the SDK should be re-raised as a RuntimeError + with a user-friendly message — otherwise both CLI and chat call sites + (which only catch RuntimeError) leak a Python traceback.""" + from agents.exceptions import MaxTurnsExceeded + kb = _make_kb(tmp_path) + + async def fake_runner(*args, **kwargs): + raise MaxTurnsExceeded("agent ran out of turns") + + with patch("openkb.agent.skill_creator.Runner.run", + new=AsyncMock(side_effect=fake_runner)): + with pytest.raises(RuntimeError, match="step cap"): + await run_skill_create( + kb_dir=kb, + skill_name="demo", + intent="x", + model="gpt-4o-mini", + ) diff --git a/tests/test_skill_tools.py b/tests/test_skill_tools.py index 3c620c97..f5db686d 100644 --- a/tests/test_skill_tools.py +++ b/tests/test_skill_tools.py @@ -1,8 +1,6 @@ -"""Tests for openkb.agent.skill_tools — path-scoped IO for the skill compile agent.""" +"""Tests for openkb.agent.skill_tools — path-scoped IO for the skill-create agent.""" from __future__ import annotations -import pytest - from openkb.agent.skill_tools import ( list_wiki_dir, read_wiki_file_for_skill, From dbd78e69799b4ae34442f6b86660912b8198f44f Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 15:39:34 +0800 Subject: [PATCH 18/25] docs(readme): restructure into Wiki Foundation + Generators layers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reframe the project mental model: the wiki is the substrate, and several primitives (query, chat, skill new) generate output from it. Add 'Drop in a book. Out comes a digital expert.' slogan to the Skill Factory subsection. - Features list split into 'Wiki foundation' (compile + maintain) and 'Generators' (query / chat / Skill Factory) - Quick Start adds step 6 — distill a skill - Architecture diagram extended to show the wiki branching into query/chat + Skill Factory + future generators (ppt/podcast/…) - Usage section regrouped under 🧱 Wiki Foundation and ✨ Generators - Skill Factory subsection promoted with the slogan as its heading and a concrete example folder tree - Chat slash command list updated to include /skill new --- README.md | 121 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 77 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 41278d29..a8010e02 100644 --- a/README.md +++ b/README.md @@ -24,16 +24,22 @@ Traditional RAG rediscovers knowledge from scratch on every query. Nothing accum ### Features +OpenKB has two layers: a **wiki foundation** that compiles and maintains your knowledge, and **generators** that turn that wiki into useful artifacts. + +**Wiki foundation** — compile and maintain - **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel, text, and more via markitdown -- **Scale to long documents** — Long and complex documents are handled via [PageIndex](https://github.com/VectifyAI/PageIndex) tree indexing, enabling accurate, vectorless long-context retrieval +- **Scale to long documents** — Handled via [PageIndex](https://github.com/VectifyAI/PageIndex) tree indexing, enabling accurate, vectorless long-context retrieval - **Native multi-modality** — Retrieves and understands figures, tables, and images, not just text -- **Compiled Wiki** — LLM manages and compiles your documents into summaries, concept pages, and cross-links, all kept in sync -- **Query** — Ask questions (one-off) against your wiki. The LLM navigates your compiled knowledge to answer -- **Interactive Chat** — Multi-turn conversations with persisted sessions you can resume across runs +- **Compiled Wiki** — LLM compiles documents into summaries, concept pages, and cross-links, all kept in sync - **Lint** — Health checks find contradictions, gaps, orphans, and stale content - **Watch mode** — Drop files into `raw/`, wiki updates automatically - **Obsidian compatible** — Wiki is plain `.md` files with `[[wikilinks]]`. Open in Obsidian for graph view and browsing +**Generators** — turn the wiki into something useful +- **Query** — Ask questions (one-off) against your wiki. The LLM navigates compiled knowledge to answer +- **Chat** — Multi-turn conversations with persisted sessions you can resume across runs +- **Skill Factory** — *Drop in a book. Out comes a digital expert.* Compile any subset of your wiki into a redistributable [Anthropic Skill](https://docs.claude.com/en/docs/build-with-claude/skills) installable in Claude Code / Codex / Gemini CLI / Cursor + # 🚀 Getting Started ### Install @@ -80,6 +86,9 @@ openkb query "What are the main findings?" # 5. Or chat interactively openkb chat + +# 6. Or distill your wiki into a redistributable skill +openkb skill new my-expert "Reason like an expert on " ``` ### Set up your LLM @@ -109,7 +118,7 @@ raw/ You drop files here │ Wiki Compilation (using LLM) │ │ ▼ ▼ -wiki/ +wiki/ │ ← the foundation ├── index.md Knowledge base overview ├── log.md Operations timeline ├── AGENTS.md Wiki schema (LLM instructions) @@ -118,6 +127,13 @@ wiki/ ├── concepts/ Cross-document synthesis ← the good stuff ├── explorations/ Saved query results └── reports/ Lint reports + │ + ┌──────────────────────┼──────────────────────┐ + ▼ ▼ ▼ + query / chat Skill Factory (future) + (LLM answers from openkb skill new ppt / podcast / + the wiki) → output/skills/ report / … + + marketplace.json ``` ### Short vs. Long Document Handling @@ -144,16 +160,15 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume # ⚙️ Usage -### Commands +OpenKB commands fall into two layers: the **wiki foundation** (compile + manage your knowledge) and **generators** (turn that wiki into useful output). + +## 🧱 Wiki Foundation — compile and maintain | Command | Description | |---|---| | `openkb init` | Initialize a new knowledge base (interactive) | | openkb add <file_or_dir_or_URL> | Add documents and compile to wiki. URL ingest auto-detects PDF (saved as `.pdf` → PageIndex / markitdown) vs HTML (trafilatura main-content extract → `.md`) | -| openkb skill new <name> "<intent>" | Compile a skill from this KB's wiki into `output/skills//` and update the marketplace manifest | | openkb remove <doc> | Remove a document and clean up its wiki pages, images, registry, and PageIndex state (use `--dry-run` to preview, `--keep-raw` / `--keep-empty-concepts` to retain artifacts) | -| openkb query "question" | Ask a question over the knowledge base (use `--save` to save the answer to `wiki/explorations/`) | -| `openkb chat` | Start an interactive multi-turn chat (use `--resume`, `--list`, `--delete` to manage sessions) | | `openkb watch` | Watch `raw/` and auto-compile new files | | `openkb lint` | Run structural + knowledge health checks | | `openkb list` | List indexed documents and concepts | @@ -162,20 +177,63 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume -### Skills — compile your wiki into a redistributable skill +## ✨ Generators — turn the wiki into output + +A "generator" reads from the compiled wiki and produces something usable: an answer, a conversation, a skill folder. The wiki is the substrate; generators are the surfaces. + +| Command | Output | +|---|---| +| openkb query "question" | A grounded answer with citations (use `--save` to persist to `wiki/explorations/`) | +| `openkb chat` | Interactive multi-turn session over the wiki (use `--resume`, `--list`, `--delete` to manage sessions) | +| openkb skill new <name> "<intent>" | A redistributable Anthropic Skill at `/output/skills//` + auto-updated `marketplace.json` | + +### Query & Chat — ask the wiki + +`openkb query "..."` answers a single question. `openkb chat` is interactive — each turn carries history, so you can dig into a topic without re-typing context. Both use the same underlying wiki and the same retrieval primitives (PageIndex for long docs, direct concept reads for short). + +```bash +openkb query "What does the literature say about attention scaling?" + +openkb chat # start a new session +openkb chat --resume # resume the most recent session +openkb chat --resume 20260411 # resume by id (unique prefix works) +openkb chat --list # list all sessions +openkb chat --delete # delete a session +``` + +Inside a chat, type `/` to access slash commands (Tab to complete): + +- `/help` — list available commands +- `/status` — show knowledge base status +- `/list` — list all documents +- `/add ` — add a document or directory without leaving the chat +- `/skill new ""` — compile a skill from this chat (see below) +- `/save [name]` — export the transcript to `wiki/explorations/` +- `/clear` — start a fresh session (the current one stays on disk) +- `/lint` — run knowledge base lint +- `/exit` — exit (Ctrl-D also works) + +### 🛠 Skill Factory — *Drop in a book. Out comes a digital expert.* -Once you have a populated wiki, you can compile a subset of it into an -**Anthropic Skill** — a portable folder that Claude Code, Codex CLI, -Gemini CLI, and Cursor all know how to load. +The newest generator. `openkb skill new` distills any subset of your wiki into an [Anthropic Skill](https://docs.claude.com/en/docs/build-with-claude/skills) — a portable folder that **Claude Code, Codex CLI, Gemini CLI, and Cursor** all install and load natively. Drop in a book's worth of papers; out comes a specialist that other agents can call on. ```bash openkb skill new karpathy-thinking \ "Reason about transformers and attention in Karpathy's style" ``` -This produces `output/skills/karpathy-thinking/` with `SKILL.md`, -optional `references/`, and an auto-updated -`.claude-plugin/marketplace.json` for distribution. +This produces: + +``` +/output/skills/karpathy-thinking/ +├── SKILL.md # YAML frontmatter + when-to-use + approach +├── references/ # depth material the agent loads on demand +│ ├── methodology.md +│ └── key-quotes.md +└── (scripts/) # optional, only if intent implies computation +``` + +…plus an auto-updated `/.claude-plugin/marketplace.json` so the whole KB is one-line installable. **Install locally:** @@ -183,15 +241,13 @@ optional `references/`, and an auto-updated cp -r output/skills/karpathy-thinking ~/.claude/skills/ ``` -**Share with others:** - -Push your KB directory to GitHub, then anyone can install all your skills with one command: +**Share with others** — push your KB to GitHub, then anyone runs: ```bash npx skills@latest add / ``` -You can also iterate inside chat: +**Iterate from chat** — compilation is one-shot, but follow-up edits aren't. Inside `openkb chat`, you can refine without re-running the whole pipeline: ``` /skill new karpathy-thinking "Reason about transformers like Karpathy" @@ -200,30 +256,7 @@ You can also iterate inside chat: [agent edits SKILL.md frontmatter in place] ``` -See [CONTRIBUTING.md](CONTRIBUTING.md) for how to submit your compiled skill back to the OpenKB community registry. - -### Interactive Chat - -`openkb chat` opens an interactive chat session over your wiki knowledge base. Unlike the one-shot `openkb query`, each turn carries the conversation history, so you can dig into a topic without re-typing context. - -```bash -openkb chat # start a new session -openkb chat --resume # resume the most recent session -openkb chat --resume 20260411 # resume by id (unique prefix works) -openkb chat --list # list all sessions -openkb chat --delete # delete a session -``` - -Inside a chat, type `/` to access slash commands (Tab to complete): - -- `/help` — list available commands -- `/status` — show knowledge base status -- `/list` — list all documents -- `/add ` — add a document or directory without leaving the chat -- `/save [name]` — export the transcript to `wiki/explorations/` -- `/clear` — start a fresh session (the current one stays on disk) -- `/lint` — run knowledge base lint -- `/exit` — exit (Ctrl-D also works) +See [CONTRIBUTING.md](CONTRIBUTING.md) for how to submit your compiled skill back to the community registry at [VectifyAI/OpenKB](https://github.com/VectifyAI/OpenKB). ### Configuration From 10c19d89c855c59d89a545134d0402287233e239 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 16:09:15 +0800 Subject: [PATCH 19/25] =?UTF-8?q?feat(skill):=20iteration=20workspace=20?= =?UTF-8?q?=E2=80=94=20preserve=20history,=20rollback=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Borrows from Anthropic's skill-creator: each time 'openkb skill new -y' would overwrite an existing skill, the old version is copied to /output/skills/-workspace/iteration-N/ instead of being destroyed. Iteration numbers monotonically increase. Each iteration carries a diff.md showing description + reference-file delta vs the previous version. New commands: openkb skill history list past iterations openkb skill rollback restore latest iteration openkb skill rollback --to N restore specific iteration Tests cover: iteration numbering, restore-from-N, restore-from-latest, diff content (description / added refs / removed refs). --- openkb/cli.py | 151 +++++++++++++++++++++++ openkb/skill_workspace.py | 219 ++++++++++++++++++++++++++++++++++ tests/test_skill_cli.py | 121 +++++++++++++++++++ tests/test_skill_workspace.py | 153 ++++++++++++++++++++++++ 4 files changed, 644 insertions(+) create mode 100644 openkb/skill_workspace.py create mode 100644 tests/test_skill_workspace.py diff --git a/openkb/cli.py b/openkb/cli.py index 5e1a26be..66496a3d 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -1461,9 +1461,18 @@ def skill_new(ctx, name, intent, yes_flag): # Overwrite handling (CLI-specific). Done AFTER key/config so a # missing key doesn't wipe the user's existing skill output. + # + # When overwriting, we don't destroy the old skill — we copy it + # into /output/skills/-workspace/iteration-N/ first, so + # the user can roll back via `openkb skill rollback`. See + # ``openkb/skill_workspace.py``. + from openkb.skill_workspace import save_iteration, write_diff + target = kb_dir / "output" / "skills" / name + saved_iteration: Path | None = None if target.exists(): if yes_flag: + saved_iteration = save_iteration(kb_dir, name) _clear_existing_skill_dir(kb_dir, name) elif sys.stdin.isatty(): if not click.confirm( @@ -1472,6 +1481,7 @@ def skill_new(ctx, name, intent, yes_flag): ): click.echo("Aborted.") ctx.exit(1) + saved_iteration = save_iteration(kb_dir, name) _clear_existing_skill_dir(kb_dir, name) else: click.echo( @@ -1497,9 +1507,150 @@ def skill_new(ctx, name, intent, yes_flag): click.echo(f"[ERROR] {exc}", err=True) ctx.exit(1) + # Drop a structural diff inside the saved iteration so the user + # can see what changed since the previous compile. + if saved_iteration is not None: + try: + write_diff(saved_iteration, target, saved_iteration / "diff.md") + except Exception as exc: # diff is best-effort; never block success + logging.getLogger(__name__).debug( + "diff generation failed: %s", exc, exc_info=True + ) + click.echo(f"\nSaved: output/skills/{name}/") + if saved_iteration is not None: + rel = saved_iteration.relative_to(kb_dir) + click.echo(f"Previous version: {rel}/ (run `openkb skill rollback {name}` to restore)") click.echo(f"Manifest: .claude-plugin/marketplace.json updated") click.echo(f"\nInstall locally:") click.echo(f" cp -r output/skills/{name} ~/.claude/skills/") click.echo(f"\nShare (push KB to GitHub, then):") click.echo(f" npx skills@latest add /") + + +@skill.command("history") +@click.argument("name") +@click.pass_context +def skill_history(ctx, name): + """List previous iterations of a skill.""" + import datetime as _dt + + from openkb.skill_workspace import list_iterations + + kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) + if kb_dir is None: + click.echo("No knowledge base found. Run `openkb init` first.", err=True) + ctx.exit(1) + + err = _validate_skill_name(name) + if err: + click.echo(f"[ERROR] {err}", err=True) + ctx.exit(1) + + iters = list_iterations(kb_dir, name) + if not iters: + click.echo(f"No previous iterations for '{name}'.") + return + + click.echo(f"Iterations of '{name}' ({len(iters)} total):\n") + click.echo(" N Path Created") + click.echo(" - -------------------------------------------------- -------") + for path in iters: + n = int(path.name.split("-", 1)[1]) + rel = path.relative_to(kb_dir) + try: + mtime = _dt.datetime.fromtimestamp(path.stat().st_mtime) + stamp = mtime.strftime("%Y-%m-%d %H:%M") + except OSError: + stamp = "-" + click.echo(f" {n} {rel} {stamp}") + + current = kb_dir / "output" / "skills" / name + if current.is_dir(): + rel_curr = current.relative_to(kb_dir) + click.echo(f"\n Current: {rel_curr}/") + + latest_n = int(iters[-1].name.split("-", 1)[1]) + click.echo("\nRestore an iteration:") + click.echo( + f" openkb skill rollback {name} # restore latest (iteration-{latest_n})" + ) + click.echo( + f" openkb skill rollback {name} --to 1 # restore iteration-1" + ) + + +@skill.command("rollback") +@click.argument("name") +@click.option( + "--to", "to_n", + default=None, type=int, + help="Iteration number to restore. Defaults to latest.", +) +@click.option( + "-y", "--yes", "yes_flag", + is_flag=True, default=False, + help="Skip confirmation.", +) +@click.pass_context +def skill_rollback(ctx, name, to_n, yes_flag): + """Restore a previous iteration as the current skill.""" + from openkb.marketplace import regenerate_marketplace + from openkb.skill_workspace import list_iterations, restore_iteration + + kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) + if kb_dir is None: + click.echo("No knowledge base found. Run `openkb init` first.", err=True) + ctx.exit(1) + + err = _validate_skill_name(name) + if err: + click.echo(f"[ERROR] {err}", err=True) + ctx.exit(1) + + iters = list_iterations(kb_dir, name) + if not iters: + click.echo( + f"[ERROR] No iterations exist for '{name}'. Nothing to roll back.", + err=True, + ) + ctx.exit(1) + + target_n = to_n if to_n is not None else int(iters[-1].name.split("-", 1)[1]) + target_label = f"iteration-{target_n}" + if not any(p.name == target_label for p in iters): + click.echo( + f"[ERROR] Iteration {target_n} not found for '{name}'. " + f"Run `openkb skill history {name}` to see available iterations.", + err=True, + ) + ctx.exit(1) + + current = kb_dir / "output" / "skills" / name + if current.exists(): + prompt = ( + f"This will overwrite output/skills/{name}/ with {target_label}. Continue?" + ) + if yes_flag: + pass + elif sys.stdin.isatty(): + if not click.confirm(prompt, default=False): + click.echo("Aborted.") + ctx.exit(1) + else: + click.echo( + f"[ERROR] output/skills/{name}/ exists. Pass -y to overwrite " + f"in non-interactive contexts.", + err=True, + ) + ctx.exit(1) + + try: + restore_iteration(kb_dir, name, n=to_n) + except FileNotFoundError as exc: + click.echo(f"[ERROR] {exc}", err=True) + ctx.exit(1) + + regenerate_marketplace(kb_dir) + click.echo(f"Restored output/skills/{name}/ from {target_label}.") + click.echo("Manifest: .claude-plugin/marketplace.json updated") diff --git a/openkb/skill_workspace.py b/openkb/skill_workspace.py new file mode 100644 index 00000000..f82a42c7 --- /dev/null +++ b/openkb/skill_workspace.py @@ -0,0 +1,219 @@ +"""Skill iteration workspace — preserve history, enable rollback. + +When ``openkb skill new -y`` would overwrite an existing skill, +the CLI calls :func:`save_iteration` first to copy the current skill +directory into a parallel ``/output/skills/-workspace/`` tree +under ``iteration-N/``. Iteration numbers monotonically increase across +overwrites, so no work is destroyed by a new compile. + +After the new skill is generated, :func:`write_diff` drops a +``diff.md`` inside the saved iteration capturing the structural delta +(description change, ref/script add/remove, SKILL.md line-count delta). + +Borrowed from Anthropic's `skill-creator` iteration pattern: +https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md +""" +from __future__ import annotations + +import re +import shutil +from pathlib import Path + +__all__ = [ + "save_iteration", + "list_iterations", + "restore_iteration", + "write_diff", +] + + +def _skill_dir(kb_dir: Path, skill_name: str) -> Path: + return kb_dir / "output" / "skills" / skill_name + + +def _workspace_dir(kb_dir: Path, skill_name: str) -> Path: + return kb_dir / "output" / "skills" / f"{skill_name}-workspace" + + +_ITER_RE = re.compile(r"^iteration-(\d+)$") + + +def _iter_number(path: Path) -> int | None: + m = _ITER_RE.match(path.name) + return int(m.group(1)) if m else None + + +def list_iterations(kb_dir: Path, skill_name: str) -> list[Path]: + """List existing iteration directories for a skill, sorted by N ascending. + + Returns an empty list if the workspace doesn't exist. + """ + ws = _workspace_dir(kb_dir, skill_name) + if not ws.is_dir(): + return [] + iters: list[tuple[int, Path]] = [] + for child in ws.iterdir(): + if not child.is_dir(): + continue + n = _iter_number(child) + if n is None: + continue + iters.append((n, child)) + iters.sort(key=lambda t: t[0]) + return [p for _, p in iters] + + +def save_iteration(kb_dir: Path, skill_name: str) -> Path | None: + """Copy current ``/output/skills//`` to the next iteration slot. + + Returns the saved iteration path, or ``None`` if there's no current + skill to save (first compile of a new name). + """ + src = _skill_dir(kb_dir, skill_name) + if not src.is_dir(): + return None + + existing = list_iterations(kb_dir, skill_name) + next_n = (max((_iter_number(p) for p in existing), default=0) or 0) + 1 + + ws = _workspace_dir(kb_dir, skill_name) + ws.mkdir(parents=True, exist_ok=True) + dest = ws / f"iteration-{next_n}" + shutil.copytree(src, dest) + return dest + + +def restore_iteration( + kb_dir: Path, skill_name: str, n: int | None = None +) -> Path: + """Restore an iteration as the current skill. + + If ``n`` is ``None``, restore the highest-numbered iteration. Raises + ``FileNotFoundError`` if the requested iteration doesn't exist. + + Returns the path to the restored skill directory. + """ + iters = list_iterations(kb_dir, skill_name) + if not iters: + raise FileNotFoundError( + f"No iterations exist for skill {skill_name!r}." + ) + + if n is None: + src = iters[-1] + else: + match = next( + (p for p in iters if _iter_number(p) == n), + None, + ) + if match is None: + raise FileNotFoundError( + f"Iteration {n} not found for skill {skill_name!r}." + ) + src = match + + dest = _skill_dir(kb_dir, skill_name) + if dest.exists(): + shutil.rmtree(dest) + shutil.copytree(src, dest) + return dest + + +# --------------------------------------------------------------------------- +# Diff +# --------------------------------------------------------------------------- + + +_DESC_RE = re.compile( + r"^description:\s*(.*?)\s*$", + re.MULTILINE, +) + + +def _extract_description(skill_md: Path) -> str: + """Return the ``description:`` line from a SKILL.md frontmatter, + or empty string if absent.""" + if not skill_md.is_file(): + return "" + text = skill_md.read_text(encoding="utf-8", errors="replace") + # Only scan the frontmatter block to avoid matching body text. + if text.startswith("---"): + end = text.find("\n---", 3) + head = text[: end if end != -1 else len(text)] + else: + head = text[:2000] + m = _DESC_RE.search(head) + return m.group(1) if m else "" + + +def _list_files(root: Path, subdir: str) -> set[str]: + base = root / subdir + if not base.is_dir(): + return set() + return { + str(p.relative_to(root)).replace("\\", "/") + for p in base.rglob("*") + if p.is_file() + } + + +def _line_count(path: Path) -> int: + if not path.is_file(): + return 0 + return len(path.read_text(encoding="utf-8", errors="replace").splitlines()) + + +def write_diff(prev: Path, curr: Path, diff_path: Path) -> None: + """Write a human-readable structural diff from ``prev`` to ``curr``. + + Covers: + * description: line changes (from SKILL.md frontmatter) + * files added/removed under ``references/`` and ``scripts/`` + * line-count delta on SKILL.md + """ + prev_desc = _extract_description(prev / "SKILL.md") + curr_desc = _extract_description(curr / "SKILL.md") + + prev_refs = _list_files(prev, "references") + curr_refs = _list_files(curr, "references") + prev_scripts = _list_files(prev, "scripts") + curr_scripts = _list_files(curr, "scripts") + + added = sorted((curr_refs - prev_refs) | (curr_scripts - prev_scripts)) + removed = sorted((prev_refs - curr_refs) | (prev_scripts - curr_scripts)) + + prev_lc = _line_count(prev / "SKILL.md") + curr_lc = _line_count(curr / "SKILL.md") + delta = curr_lc - prev_lc + + lines: list[str] = [] + lines.append(f"# Skill diff: {prev.name} -> current\n") + + lines.append("## description\n") + if prev_desc == curr_desc: + lines.append("_unchanged_\n") + else: + lines.append(f"- before: {prev_desc or '(none)'}") + lines.append(f"- after: {curr_desc or '(none)'}\n") + + lines.append("## files\n") + if not added and not removed: + lines.append("_no files added or removed_\n") + else: + for p in added: + lines.append(f"- added: {p}") + for p in removed: + lines.append(f"- removed: {p}") + lines.append("") + + lines.append("## SKILL.md line count\n") + sign = "+" if delta >= 0 else "" + lines.append( + f"- before: {prev_lc} lines" + ) + lines.append( + f"- after: {curr_lc} lines ({sign}{delta})\n" + ) + + diff_path.parent.mkdir(parents=True, exist_ok=True) + diff_path.write_text("\n".join(lines), encoding="utf-8") diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py index 4004450d..e570d000 100644 --- a/tests/test_skill_cli.py +++ b/tests/test_skill_cli.py @@ -131,6 +131,127 @@ async def fake_run(kb_dir, skill_name, intent, model): assert (kb / "output" / "skills" / "demo" / "SKILL.md").exists() +def test_skill_new_saves_iteration_when_overwriting(tmp_path): + """Overwriting with -y must copy the old skill into the workspace + under iteration-1/ before the generator runs.""" + kb = _make_kb(tmp_path) + target = kb / "output" / "skills" / "demo" + target.mkdir(parents=True) + (target / "SKILL.md").write_text( + "---\nname: demo\ndescription: original description\n---\n\n# demo\n" + ) + + runner = CliRunner() + + async def fake_run(kb_dir, skill_name, intent, model): + _fake_compile(kb_dir, skill_name) + + with patch("openkb.cli._find_kb_dir", return_value=kb), \ + patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): + result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"]) + + assert result.exit_code == 0, result.output + iter1 = kb / "output" / "skills" / "demo-workspace" / "iteration-1" + assert iter1.is_dir() + assert (iter1 / "SKILL.md").exists() + assert "original description" in (iter1 / "SKILL.md").read_text() + # And a diff.md is dropped against the previous version + assert (iter1 / "diff.md").exists() + + +def test_skill_history_command_lists_iterations(tmp_path): + """`openkb skill history ` lists existing iteration directories.""" + kb = _make_kb(tmp_path) + ws = kb / "output" / "skills" / "demo-workspace" + (ws / "iteration-1").mkdir(parents=True) + (ws / "iteration-1" / "SKILL.md").write_text( + "---\nname: demo\ndescription: v1\n---\n" + ) + (ws / "iteration-2").mkdir(parents=True) + (ws / "iteration-2" / "SKILL.md").write_text( + "---\nname: demo\ndescription: v2\n---\n" + ) + + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + result = runner.invoke(cli, ["skill", "history", "demo"]) + + assert result.exit_code == 0, result.output + assert "iteration-1" in result.output + assert "iteration-2" in result.output + + +def test_skill_history_command_when_no_iterations(tmp_path): + kb = _make_kb(tmp_path) + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + result = runner.invoke(cli, ["skill", "history", "demo"]) + assert result.exit_code == 0, result.output + assert "No previous iterations" in result.output + + +def test_skill_rollback_restores_from_workspace(tmp_path): + """`openkb skill rollback ` copies the latest iteration back + into output/skills//.""" + kb = _make_kb(tmp_path) + ws = kb / "output" / "skills" / "demo-workspace" + (ws / "iteration-1").mkdir(parents=True) + (ws / "iteration-1" / "SKILL.md").write_text( + "---\nname: demo\ndescription: restored\n---\n\n# demo\n" + ) + # Current skill is "broken" + current = kb / "output" / "skills" / "demo" + current.mkdir(parents=True) + (current / "SKILL.md").write_text( + "---\nname: demo\ndescription: broken\n---\n" + ) + + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + result = runner.invoke(cli, ["skill", "rollback", "demo", "-y"]) + + assert result.exit_code == 0, result.output + text = (current / "SKILL.md").read_text() + assert "restored" in text + assert "broken" not in text + # Marketplace manifest regenerated + assert (kb / ".claude-plugin" / "marketplace.json").exists() + + +def test_skill_rollback_to_specific_iteration(tmp_path): + kb = _make_kb(tmp_path) + ws = kb / "output" / "skills" / "demo-workspace" + (ws / "iteration-1").mkdir(parents=True) + (ws / "iteration-1" / "SKILL.md").write_text( + "---\nname: demo\ndescription: v1\n---\n" + ) + (ws / "iteration-2").mkdir(parents=True) + (ws / "iteration-2" / "SKILL.md").write_text( + "---\nname: demo\ndescription: v2\n---\n" + ) + current = kb / "output" / "skills" / "demo" + current.mkdir(parents=True) + (current / "SKILL.md").write_text("placeholder") + + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + result = runner.invoke( + cli, ["skill", "rollback", "demo", "--to", "1", "-y"] + ) + + assert result.exit_code == 0, result.output + assert "v1" in (current / "SKILL.md").read_text() + + +def test_skill_rollback_errors_when_no_iterations(tmp_path): + kb = _make_kb(tmp_path) + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + result = runner.invoke(cli, ["skill", "rollback", "demo", "-y"]) + assert result.exit_code != 0 + assert "No iterations" in result.output + + def test_skill_new_keeps_existing_skill_when_key_setup_fails(tmp_path): """If LLM key setup raises (e.g. no API key), the old skill output must be preserved — don't rmtree before key setup is verified.""" diff --git a/tests/test_skill_workspace.py b/tests/test_skill_workspace.py new file mode 100644 index 00000000..ac4b5af7 --- /dev/null +++ b/tests/test_skill_workspace.py @@ -0,0 +1,153 @@ +"""Tests for :mod:`openkb.skill_workspace` — iteration save/restore + diff.""" +from __future__ import annotations + +from pathlib import Path + +import pytest + +from openkb.skill_workspace import ( + list_iterations, + restore_iteration, + save_iteration, + write_diff, +) + + +def _make_skill(kb_dir: Path, name: str, *, description: str = "demo desc", + refs: list[str] | None = None, + skill_md_lines: int = 5) -> Path: + target = kb_dir / "output" / "skills" / name + target.mkdir(parents=True, exist_ok=True) + body = "\n".join(f"line {i}" for i in range(1, max(1, skill_md_lines) + 1)) + (target / "SKILL.md").write_text( + f"---\nname: {name}\ndescription: {description}\n---\n\n{body}\n", + encoding="utf-8", + ) + if refs: + refs_dir = target / "references" + refs_dir.mkdir(parents=True, exist_ok=True) + for r in refs: + (refs_dir / r).write_text("# ref\n", encoding="utf-8") + return target + + +# --------------------------------------------------------------------------- +# save_iteration +# --------------------------------------------------------------------------- + + +def test_save_iteration_returns_none_when_no_current_skill(tmp_path): + assert save_iteration(tmp_path, "missing") is None + + +def test_save_iteration_writes_iteration_1_then_2(tmp_path): + _make_skill(tmp_path, "demo", description="v1") + first = save_iteration(tmp_path, "demo") + assert first is not None + assert first.name == "iteration-1" + assert (first / "SKILL.md").exists() + assert "v1" in (first / "SKILL.md").read_text() + + # Mutate, then save again + _make_skill(tmp_path, "demo", description="v2") + second = save_iteration(tmp_path, "demo") + assert second is not None + assert second.name == "iteration-2" + assert "v2" in (second / "SKILL.md").read_text() + # First survives untouched + assert "v1" in (first / "SKILL.md").read_text() + + +# --------------------------------------------------------------------------- +# list_iterations +# --------------------------------------------------------------------------- + + +def test_list_iterations_returns_empty_when_no_workspace(tmp_path): + assert list_iterations(tmp_path, "nothing") == [] + + +def test_list_iterations_returns_sorted(tmp_path): + _make_skill(tmp_path, "demo") + save_iteration(tmp_path, "demo") + _make_skill(tmp_path, "demo", description="v2") + save_iteration(tmp_path, "demo") + iters = list_iterations(tmp_path, "demo") + assert [p.name for p in iters] == ["iteration-1", "iteration-2"] + + +# --------------------------------------------------------------------------- +# restore_iteration +# --------------------------------------------------------------------------- + + +def test_restore_iteration_latest_when_n_is_none(tmp_path): + _make_skill(tmp_path, "demo", description="v1") + save_iteration(tmp_path, "demo") + _make_skill(tmp_path, "demo", description="v2") + save_iteration(tmp_path, "demo") + # Simulate current skill being something else / broken + _make_skill(tmp_path, "demo", description="broken") + + restored = restore_iteration(tmp_path, "demo", n=None) + text = (restored / "SKILL.md").read_text() + assert "v2" in text + assert "broken" not in text + + +def test_restore_iteration_specific_n(tmp_path): + _make_skill(tmp_path, "demo", description="v1") + save_iteration(tmp_path, "demo") + _make_skill(tmp_path, "demo", description="v2") + save_iteration(tmp_path, "demo") + + restored = restore_iteration(tmp_path, "demo", n=1) + assert "v1" in (restored / "SKILL.md").read_text() + + +def test_restore_iteration_missing_raises(tmp_path): + _make_skill(tmp_path, "demo") + save_iteration(tmp_path, "demo") + with pytest.raises(FileNotFoundError): + restore_iteration(tmp_path, "demo", n=99) + + +def test_restore_iteration_no_workspace_raises(tmp_path): + with pytest.raises(FileNotFoundError): + restore_iteration(tmp_path, "demo") + + +# --------------------------------------------------------------------------- +# write_diff +# --------------------------------------------------------------------------- + + +def test_write_diff_records_description_change(tmp_path): + prev = _make_skill(tmp_path / "prev", "demo", description="old description") + curr = _make_skill(tmp_path / "curr", "demo", description="new description") + out = tmp_path / "diff.md" + write_diff(prev, curr, out) + content = out.read_text() + assert "description" in content.lower() + assert "old description" in content + assert "new description" in content + + +def test_write_diff_records_added_reference(tmp_path): + prev = _make_skill(tmp_path / "prev", "demo", refs=[]) + curr = _make_skill(tmp_path / "curr", "demo", refs=["new.md"]) + out = tmp_path / "diff.md" + write_diff(prev, curr, out) + content = out.read_text() + assert "added:" in content + assert "references/new.md" in content + + +def test_write_diff_records_removed_reference(tmp_path): + prev = _make_skill(tmp_path / "prev", "demo", refs=["old.md"]) + curr = _make_skill(tmp_path / "curr", "demo", refs=[]) + out = tmp_path / "diff.md" + write_diff(prev, curr, out) + content = out.read_text() + assert "removed:" in content + assert "references/old.md" in content From 7066e8351970d505bca531e6fe69aa39d2ddc871 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 16:13:42 +0800 Subject: [PATCH 20/25] feat(skill): pure-Python structural validator + auto-run on compile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Borrows from Codex skill-creator's quick_validate.py: catches the common failure modes that would make a skill unloadable before distribution — missing/malformed frontmatter, name/dir mismatch, oversized files, broken references/* wikilinks, non-stdlib imports in scripts/* (strict mode). New CLI: openkb skill validate validate all compiled skills openkb skill validate validate one openkb skill validate --strict treat warnings as failures openkb skill new auto-runs validation after compile and surfaces errors + warnings so the user knows whether the freshly-compiled skill is well-formed. Doesn't block marketplace.json regeneration — the files are on disk and the user can fix or rollback. --- openkb/cli.py | 66 ++++++++ openkb/skill_validator.py | 208 +++++++++++++++++++++++ tests/test_skill_cli.py | 37 +++++ tests/test_skill_validator.py | 305 ++++++++++++++++++++++++++++++++++ 4 files changed, 616 insertions(+) create mode 100644 openkb/skill_validator.py create mode 100644 tests/test_skill_validator.py diff --git a/openkb/cli.py b/openkb/cli.py index 66496a3d..6e248f0f 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -1517,6 +1517,22 @@ def skill_new(ctx, name, intent, yes_flag): "diff generation failed: %s", exc, exc_info=True ) + # Auto-validate the freshly compiled skill. Surface issues but don't + # block — files are on disk and the user can fix or rollback. + from openkb.skill_validator import validate_skill + skill_dir = kb_dir / "output" / "skills" / name + result = validate_skill(skill_dir) + if result.errors or result.warnings: + click.echo("\n[WARN] Validation found issues:") + for err in result.errors: + click.echo(f" ERROR: {err}") + for warn in result.warnings: + click.echo(f" WARN: {warn}") + click.echo( + f"\nRun `openkb skill validate {name}` to re-check, or " + f"`openkb skill rollback {name}` to revert." + ) + click.echo(f"\nSaved: output/skills/{name}/") if saved_iteration is not None: rel = saved_iteration.relative_to(kb_dir) @@ -1654,3 +1670,53 @@ def skill_rollback(ctx, name, to_n, yes_flag): regenerate_marketplace(kb_dir) click.echo(f"Restored output/skills/{name}/ from {target_label}.") click.echo("Manifest: .claude-plugin/marketplace.json updated") + + +@skill.command("validate") +@click.argument("name", required=False) +@click.option( + "--strict", is_flag=True, default=False, + help="Treat warnings as failures (exit non-zero).", +) +@click.pass_context +def skill_validate(ctx, name, strict): + """Validate one skill (by name) or all compiled skills in this KB.""" + from openkb.skill_validator import validate_skill + + kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) + if kb_dir is None: + click.echo("No knowledge base found.", err=True) + ctx.exit(1) + + skills_root = kb_dir / "output" / "skills" + if not skills_root.is_dir(): + click.echo("No skills found. Compile one with `openkb skill new`.") + return + + if name: + target = skills_root / name + if not target.is_dir(): + click.echo(f"[ERROR] Skill '{name}' not found.", err=True) + ctx.exit(1) + targets = [target] + else: + targets = sorted( + d for d in skills_root.iterdir() + if d.is_dir() and not d.name.endswith("-workspace") + ) + + any_failed = False + for t in targets: + result = validate_skill(t, strict=strict) + passed = result.passed_strict if strict else result.passed + prefix = "[OK]" if passed else "[FAIL]" + click.echo(f"{prefix} {t.name}") + for err in result.errors: + click.echo(f" ERROR: {err}") + for warn in result.warnings: + click.echo(f" WARN: {warn}") + if not passed: + any_failed = True + + if any_failed: + ctx.exit(1) diff --git a/openkb/skill_validator.py b/openkb/skill_validator.py new file mode 100644 index 00000000..82de55d4 --- /dev/null +++ b/openkb/skill_validator.py @@ -0,0 +1,208 @@ +"""Deterministic structural validator for compiled skills. + +Pure Python — no LLM calls. Catches the common failure modes that would +make a skill un-loadable or misleading to the agents that install it: + + * SKILL.md missing or unparseable + * frontmatter missing required fields + * name field doesn't match directory or violates the slug rule + * description too long (> 1024 chars per Anthropic spec) + * files too big (SKILL.md > 50 KB / references/*.md > 100 KB) + * `[[references/...]]` wikilinks pointing at files that don't exist + * (strict mode) scripts/*.py importing non-stdlib modules + +This is the deterministic counterpart to ``openkb skill eval`` — eval +measures whether the description fires; validate ensures the structure +is well-formed. +""" +from __future__ import annotations + +import ast +import re +import sys +from dataclasses import dataclass, field +from pathlib import Path + +import yaml # already a project dep (pyyaml) + + +SKILL_NAME_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$") +DESCRIPTION_MAX_CHARS = 1024 +SKILL_MD_MAX_BYTES = 50 * 1024 +REFERENCE_MAX_BYTES = 100 * 1024 +NAME_MAX_LEN = 64 +WIKILINK_RE = re.compile(r"\[\[references/([a-z0-9._/-]+)\]\]", re.IGNORECASE) + + +@dataclass +class ValidationResult: + errors: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + + @property + def passed(self) -> bool: + return not self.errors + + @property + def passed_strict(self) -> bool: + return not self.errors and not self.warnings + + +def validate_skill(skill_dir: Path, *, strict: bool = False) -> ValidationResult: + """Run all structural checks on a single compiled skill directory. + + Args: + skill_dir: ``/output/skills/`` + strict: if True, warnings are surfaced; otherwise only errors + + Returns a ValidationResult. Use ``result.passed`` for the default + semantics (errors block, warnings don't); use ``result.passed_strict`` + when running ``--strict``. + """ + result = ValidationResult() + skill_dir = Path(skill_dir) + + if not skill_dir.is_dir(): + result.errors.append(f"Skill directory does not exist: {skill_dir}") + return result + + skill_md = skill_dir / "SKILL.md" + if not skill_md.exists(): + result.errors.append("Missing required file: SKILL.md") + return result + + # File size + skill_size = skill_md.stat().st_size + if skill_size > SKILL_MD_MAX_BYTES: + result.errors.append( + f"SKILL.md is {skill_size} bytes; max is {SKILL_MD_MAX_BYTES} bytes." + ) + + text = skill_md.read_text(encoding="utf-8") + + # Frontmatter + fm = _extract_frontmatter(text) + if fm is None: + result.errors.append( + "SKILL.md has no YAML frontmatter (must start with `---` ... `---`)." + ) + return result + + try: + meta = yaml.safe_load(fm) or {} + except yaml.YAMLError as exc: + result.errors.append(f"Frontmatter is not valid YAML: {exc}") + return result + + if not isinstance(meta, dict): + result.errors.append("Frontmatter must be a YAML mapping.") + return result + + # name field + name = meta.get("name") + if not name: + result.errors.append("Frontmatter is missing required field 'name:'.") + elif not isinstance(name, str): + result.errors.append("Frontmatter 'name:' must be a string.") + else: + if name != skill_dir.name: + result.errors.append( + f"Frontmatter 'name: {name}' doesn't match directory name " + f"'{skill_dir.name}'." + ) + if not SKILL_NAME_RE.match(name) or len(name) > NAME_MAX_LEN: + result.errors.append( + f"Frontmatter 'name: {name}' must be kebab-case " + f"(lowercase a-z0-9 + dashes), 1-{NAME_MAX_LEN} chars." + ) + + # description field + desc = meta.get("description") + if not desc: + result.errors.append("Frontmatter is missing required field 'description:'.") + elif not isinstance(desc, str): + result.errors.append("Frontmatter 'description:' must be a string.") + else: + if len(desc) > DESCRIPTION_MAX_CHARS: + result.errors.append( + f"Frontmatter 'description:' is {len(desc)} chars; " + f"max is {DESCRIPTION_MAX_CHARS} chars." + ) + if len(desc) < 20: + result.warnings.append( + f"Frontmatter 'description:' is only {len(desc)} chars — " + f"too short to be a useful activation signal." + ) + + # references/ wikilink resolution + wikilinks = WIKILINK_RE.findall(text) + refs_dir = skill_dir / "references" + for link in wikilinks: + # link may or may not include .md suffix + target = refs_dir / link + if not target.suffix: + target = target.with_suffix(".md") + if not target.exists(): + result.errors.append( + f"SKILL.md references [[references/{link}]] but " + f"{target.relative_to(skill_dir)} doesn't exist." + ) + + # references/*.md file sizes + if refs_dir.is_dir(): + for ref in refs_dir.rglob("*.md"): + size = ref.stat().st_size + if size > REFERENCE_MAX_BYTES: + result.errors.append( + f"{ref.relative_to(skill_dir)} is {size} bytes; " + f"max is {REFERENCE_MAX_BYTES} bytes." + ) + + # scripts/*.py imports — strict only + if strict: + scripts_dir = skill_dir / "scripts" + if scripts_dir.is_dir(): + for script in scripts_dir.rglob("*.py"): + bad = _non_stdlib_imports(script) + if bad: + result.warnings.append( + f"{script.relative_to(skill_dir)} imports non-stdlib " + f"modules: {', '.join(sorted(bad))}. Skill scripts run " + f"in unknown environments — prefer stdlib only." + ) + + return result + + +def _extract_frontmatter(text: str) -> str | None: + """Return the YAML body between the first two `---` lines, or None.""" + lines = text.splitlines() + if not lines or lines[0].strip() != "---": + return None + try: + end = lines.index("---", 1) + except ValueError: + return None + return "\n".join(lines[1:end]) + + +def _non_stdlib_imports(script: Path) -> set[str]: + """Return imported module names that aren't in the Python stdlib.""" + try: + tree = ast.parse(script.read_text(encoding="utf-8")) + except SyntaxError: + return {""} + stdlib = set(sys.stdlib_module_names) if hasattr(sys, "stdlib_module_names") else set() + bad: set[str] = set() + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + root = alias.name.split(".")[0] + if stdlib and root not in stdlib: + bad.add(root) + elif isinstance(node, ast.ImportFrom): + if node.module: + root = node.module.split(".")[0] + if stdlib and root not in stdlib: + bad.add(root) + return bad diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py index e570d000..6b2ea87e 100644 --- a/tests/test_skill_cli.py +++ b/tests/test_skill_cli.py @@ -252,6 +252,43 @@ def test_skill_rollback_errors_when_no_iterations(tmp_path): assert "No iterations" in result.output +def test_skill_validate_passes_on_valid_skill(tmp_path): + """`openkb skill validate ` exits 0 and prints OK for a well-formed skill.""" + kb = _make_kb(tmp_path) + target = kb / "output" / "skills" / "demo" + target.mkdir(parents=True) + (target / "SKILL.md").write_text( + "---\nname: demo\n" + "description: A useful and descriptive activation signal for this skill.\n" + "---\n\n# demo\n" + ) + + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + result = runner.invoke(cli, ["skill", "validate", "demo"]) + + assert result.exit_code == 0, result.output + assert "[OK]" in result.output + assert "demo" in result.output + + +def test_skill_validate_fails_on_invalid_frontmatter(tmp_path): + """`openkb skill validate ` exits non-zero on malformed YAML.""" + kb = _make_kb(tmp_path) + target = kb / "output" / "skills" / "broken" + target.mkdir(parents=True) + # No frontmatter at all — must error out. + (target / "SKILL.md").write_text("# just a body, no frontmatter\n") + + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb): + result = runner.invoke(cli, ["skill", "validate", "broken"]) + + assert result.exit_code != 0, result.output + assert "ERROR" in result.output + assert "[FAIL]" in result.output + + def test_skill_new_keeps_existing_skill_when_key_setup_fails(tmp_path): """If LLM key setup raises (e.g. no API key), the old skill output must be preserved — don't rmtree before key setup is verified.""" diff --git a/tests/test_skill_validator.py b/tests/test_skill_validator.py new file mode 100644 index 00000000..06fad87a --- /dev/null +++ b/tests/test_skill_validator.py @@ -0,0 +1,305 @@ +"""Unit tests for openkb.skill_validator — pure-Python structural checks +on a compiled skill directory. No LLM, no network.""" +from __future__ import annotations + +from pathlib import Path + +from openkb.skill_validator import ( + DESCRIPTION_MAX_CHARS, + REFERENCE_MAX_BYTES, + SKILL_MD_MAX_BYTES, + validate_skill, +) + + +# --------------------------------------------------------------------------- +# helpers +# --------------------------------------------------------------------------- + +def _write_skill( + parent: Path, + name: str, + *, + frontmatter: str | None = "default", + body: str = "# body\n", + refs: dict[str, str] | None = None, + scripts: dict[str, str] | None = None, + skill_md_bytes: int | None = None, +) -> Path: + """Create a skill directory with the requested contents. + + frontmatter: + "default" -> a valid minimal frontmatter with matching name + long desc + str -> use that exact frontmatter block (between --- markers) + None -> write the body with NO frontmatter at all + """ + skill_dir = parent / name + skill_dir.mkdir(parents=True, exist_ok=True) + if frontmatter == "default": + fm = f"name: {name}\ndescription: A useful and descriptive skill activation signal." + else: + fm = frontmatter + + if fm is None: + text = body + else: + text = f"---\n{fm}\n---\n\n{body}" + + if skill_md_bytes is not None: + text = text + ("x" * skill_md_bytes) + (skill_dir / "SKILL.md").write_text(text, encoding="utf-8") + + if refs: + refs_dir = skill_dir / "references" + refs_dir.mkdir(exist_ok=True) + for relpath, content in refs.items(): + target = refs_dir / relpath + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(content, encoding="utf-8") + + if scripts: + scripts_dir = skill_dir / "scripts" + scripts_dir.mkdir(exist_ok=True) + for relpath, content in scripts.items(): + target = scripts_dir / relpath + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(content, encoding="utf-8") + + return skill_dir + + +# --------------------------------------------------------------------------- +# happy path +# --------------------------------------------------------------------------- + +def test_minimal_valid_skill_passes(tmp_path): + sd = _write_skill(tmp_path, "demo-skill") + result = validate_skill(sd) + assert result.passed, result.errors + assert result.errors == [] + assert result.warnings == [] + + +# --------------------------------------------------------------------------- +# missing/structural errors +# --------------------------------------------------------------------------- + +def test_skill_directory_missing(tmp_path): + result = validate_skill(tmp_path / "nope") + assert not result.passed + assert any("does not exist" in e for e in result.errors) + + +def test_missing_skill_md(tmp_path): + sd = tmp_path / "empty-skill" + sd.mkdir() + result = validate_skill(sd) + assert not result.passed + assert any("SKILL.md" in e for e in result.errors) + + +def test_no_frontmatter(tmp_path): + sd = _write_skill(tmp_path, "no-fm", frontmatter=None, body="# just a body\n") + result = validate_skill(sd) + assert not result.passed + assert any("frontmatter" in e.lower() for e in result.errors) + + +def test_malformed_yaml(tmp_path): + sd = _write_skill(tmp_path, "bad-yaml", frontmatter="name: : : bad\n - oops\n[") + result = validate_skill(sd) + assert not result.passed + assert any("YAML" in e or "yaml" in e for e in result.errors) + + +def test_frontmatter_not_mapping(tmp_path): + sd = _write_skill(tmp_path, "list-fm", frontmatter="- one\n- two") + result = validate_skill(sd) + assert not result.passed + assert any("mapping" in e for e in result.errors) + + +# --------------------------------------------------------------------------- +# name field +# --------------------------------------------------------------------------- + +def test_name_mismatch_with_directory(tmp_path): + sd = _write_skill( + tmp_path, "dir-name", + frontmatter="name: other-name\ndescription: A nice long description here.", + ) + result = validate_skill(sd) + assert not result.passed + assert any("doesn't match directory" in e for e in result.errors) + + +def test_name_invalid_uppercase(tmp_path): + sd = tmp_path / "BadName" + sd.mkdir() + (sd / "SKILL.md").write_text( + "---\nname: BadName\ndescription: A nice long description here.\n---\n" + ) + result = validate_skill(sd) + assert not result.passed + assert any("kebab-case" in e for e in result.errors) + + +def test_name_invalid_underscore(tmp_path): + sd = tmp_path / "bad_name" + sd.mkdir() + (sd / "SKILL.md").write_text( + "---\nname: bad_name\ndescription: A nice long description here.\n---\n" + ) + result = validate_skill(sd) + assert not result.passed + assert any("kebab-case" in e for e in result.errors) + + +def test_name_missing(tmp_path): + sd = _write_skill( + tmp_path, "no-name-field", + frontmatter="description: A nice long description here.", + ) + result = validate_skill(sd) + assert not result.passed + assert any("name" in e for e in result.errors) + + +# --------------------------------------------------------------------------- +# description field +# --------------------------------------------------------------------------- + +def test_description_missing(tmp_path): + sd = _write_skill(tmp_path, "no-desc", frontmatter="name: no-desc") + result = validate_skill(sd) + assert not result.passed + assert any("description" in e for e in result.errors) + + +def test_description_too_long(tmp_path): + long_desc = "x" * (DESCRIPTION_MAX_CHARS + 1) + sd = _write_skill( + tmp_path, "long-desc", + frontmatter=f"name: long-desc\ndescription: {long_desc}", + ) + result = validate_skill(sd) + assert not result.passed + assert any("description" in e and "chars" in e for e in result.errors) + + +def test_description_too_short_is_warning_not_error(tmp_path): + sd = _write_skill( + tmp_path, "short-desc", + frontmatter="name: short-desc\ndescription: too short", + ) + result = validate_skill(sd) + # Errors should be empty — short desc is a warning, not blocking. + assert result.errors == [] + assert result.passed # passes default semantics + assert any("too short" in w for w in result.warnings) + assert not result.passed_strict # but fails under --strict + + +# --------------------------------------------------------------------------- +# file sizes +# --------------------------------------------------------------------------- + +def test_skill_md_too_big(tmp_path): + sd = _write_skill( + tmp_path, "big-skill", + skill_md_bytes=SKILL_MD_MAX_BYTES + 1, + ) + result = validate_skill(sd) + assert not result.passed + assert any("SKILL.md" in e and "bytes" in e for e in result.errors) + + +def test_reference_too_big(tmp_path): + big = "y" * (REFERENCE_MAX_BYTES + 1) + sd = _write_skill( + tmp_path, "big-ref", + refs={"huge.md": big}, + ) + result = validate_skill(sd) + assert not result.passed + assert any("huge.md" in e and "bytes" in e for e in result.errors) + + +# --------------------------------------------------------------------------- +# wikilinks +# --------------------------------------------------------------------------- + +def test_wikilink_resolves(tmp_path): + sd = _write_skill( + tmp_path, "with-ref", + body="See [[references/topic.md]] for details.\n", + refs={"topic.md": "# topic\n"}, + ) + result = validate_skill(sd) + assert result.passed, result.errors + + +def test_wikilink_missing_target(tmp_path): + sd = _write_skill( + tmp_path, "broken-ref", + body="See [[references/missing.md]] for details.\n", + ) + result = validate_skill(sd) + assert not result.passed + assert any("missing.md" in e for e in result.errors) + + +def test_wikilink_without_md_suffix_resolves(tmp_path): + sd = _write_skill( + tmp_path, "ref-no-suffix", + body="See [[references/topic]] for details.\n", + refs={"topic.md": "# topic\n"}, + ) + result = validate_skill(sd) + assert result.passed, result.errors + + +# --------------------------------------------------------------------------- +# scripts/ imports — strict mode only +# --------------------------------------------------------------------------- + +def test_scripts_stdlib_only_no_warning(tmp_path): + sd = _write_skill( + tmp_path, "stdlib-script", + scripts={"do.py": "import os\nimport sys\nfrom pathlib import Path\n"}, + ) + result = validate_skill(sd, strict=True) + assert result.passed_strict, (result.errors, result.warnings) + + +def test_scripts_non_stdlib_warning_only_in_strict(tmp_path): + sd = _write_skill( + tmp_path, "requests-script", + scripts={"fetch.py": "import requests\nimport os\n"}, + ) + + # Non-strict: no warning surfaced (we still pass). + result = validate_skill(sd, strict=False) + assert result.passed + assert result.warnings == [] + + # Strict: warning surfaced, fails strict. + result = validate_skill(sd, strict=True) + assert result.passed # no errors + assert not result.passed_strict + assert any("requests" in w for w in result.warnings) + + +# --------------------------------------------------------------------------- +# passed vs passed_strict semantics +# --------------------------------------------------------------------------- + +def test_passed_vs_passed_strict_semantics(tmp_path): + # Skill that has a warning (short desc) but no errors. + sd = _write_skill( + tmp_path, "warn-only", + frontmatter="name: warn-only\ndescription: short", + ) + result = validate_skill(sd) + assert result.passed is True + assert result.passed_strict is False From 6f70d022a92dcd88321382822ea3a60e913aae8d Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 16:19:00 +0800 Subject: [PATCH 21/25] feat(skill): trigger-accuracy evaluator (skill eval) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Borrows from Anthropic skill-creator's evaluation loop, simplified for v0.3: measure whether a skill's description: field actually fires when it should and stays quiet when it shouldn't. The description is the activation signal other agents read; a vague description silently fails to load when it ought to. Flow: 1. LLM generates N should-trigger + N should-not prompts from the description only 2. Grader LLM scores each: 'should this description activate this skill for this question?' 3. Compare to ground truth, print pass rate + misses New CLI: openkb skill eval run eval (10+10 default) openkb skill eval --save persist prompts to disk openkb skill eval --eval-set X reuse saved prompts openkb skill eval --count N override prompt count Tests mock Runner.run for both generator and grader — no real LLM calls in CI. Saved eval sets live at .openkb/eval-sets/.json for reproducibility. --- openkb/cli.py | 74 +++++++++++ openkb/skill_evaluator.py | 220 ++++++++++++++++++++++++++++++++ tests/test_skill_cli.py | 80 ++++++++++++ tests/test_skill_evaluator.py | 232 ++++++++++++++++++++++++++++++++++ 4 files changed, 606 insertions(+) create mode 100644 openkb/skill_evaluator.py create mode 100644 tests/test_skill_evaluator.py diff --git a/openkb/cli.py b/openkb/cli.py index 6e248f0f..75e2dce7 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -1720,3 +1720,77 @@ def skill_validate(ctx, name, strict): if any_failed: ctx.exit(1) + + +@skill.command("eval") +@click.argument("name") +@click.option( + "--save", "save_flag", is_flag=True, default=False, + help="Persist the generated eval set to .openkb/eval-sets/.json", +) +@click.option( + "--eval-set", "eval_set_path", default=None, type=click.Path(), + help="Use a saved eval set instead of generating fresh prompts.", +) +@click.option( + "--count", default=10, type=int, + help="Number of should-trigger + should-not prompts (each).", +) +@click.pass_context +def skill_eval(ctx, name, save_flag, eval_set_path, count): + """Measure how accurately a compiled skill's description fires. + + Generates trigger-eval prompts via LLM, then asks a grader LLM whether + the description should activate the skill for each prompt. Prints pass + rate + miss list. + """ + import asyncio + from openkb.skill_evaluator import ( + run_eval, save_eval_set, load_eval_set, EvalPrompt, + ) + + kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) + if kb_dir is None: + click.echo("No knowledge base found.", err=True) + ctx.exit(1) + + skill_dir = kb_dir / "output" / "skills" / name + if not skill_dir.is_dir(): + click.echo(f"[ERROR] Skill '{name}' not found.", err=True) + ctx.exit(1) + + _setup_llm_key(kb_dir) + config = load_config(kb_dir / ".openkb" / "config.yaml") + model = config.get("model", DEFAULT_CONFIG["model"]) + + eval_set: list[EvalPrompt] | None = None + if eval_set_path: + eval_set = load_eval_set(Path(eval_set_path)) + click.echo(f"Loaded eval set from {eval_set_path} ({len(eval_set)} prompts).") + else: + click.echo(f"Generating eval set for '{name}' (count={count} per side)...") + + try: + result = asyncio.run(run_eval( + skill_dir, model=model, eval_set=eval_set, count=count, + )) + except RuntimeError as exc: + click.echo(f"[ERROR] {exc}", err=True) + ctx.exit(1) + + click.echo(f"\nEval set: {result.total} prompts") + click.echo( + f"Pass rate: {result.passed}/{result.total} " + f"({result.pass_rate * 100:.0f}%)" + ) + + if result.misses: + click.echo(f"\nMisses ({len(result.misses)}):") + for miss in result.misses: + click.echo(f" - {miss.label} {miss.prompt.question}") + else: + click.echo("\nAll prompts graded correctly.") + + if save_flag and eval_set is None: + path = save_eval_set(kb_dir, name, result.prompts) + click.echo(f"\nEval set persisted to {path}") diff --git a/openkb/skill_evaluator.py b/openkb/skill_evaluator.py new file mode 100644 index 00000000..6aa9a15d --- /dev/null +++ b/openkb/skill_evaluator.py @@ -0,0 +1,220 @@ +"""Trigger-accuracy evaluator for compiled skills. + +The description: field in SKILL.md is the activation signal — it's what +other agents read to decide whether to load the skill for a given user +question. A vague or off-target description fails to fire when it should +(false negatives) or fires when it shouldn't (false positives). This +module catches both. + +Flow: + 1. Read description from the skill's SKILL.md frontmatter + 2. Ask a generator LLM: produce N should-trigger + N should-not prompts + based purely on the description (no other context) + 3. For each prompt, ask a grader LLM: "given just this description, + should an agent load this skill for this question? yes/no" + 4. Compare against expected labels (the ground truth from step 2) + 5. Report pass rate + the specific misses + +Uses the same LiteLLM model the rest of the KB uses (config.yaml). No +real LLM calls in tests — both generator and grader are patched. +""" +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from pathlib import Path +from typing import Literal + +import yaml + +from agents import Agent, Runner +from agents.model_settings import ModelSettings + + +EVAL_DEFAULT_COUNT = 10 # 10 trigger + 10 no-trigger = 20 prompts + + +@dataclass +class EvalPrompt: + question: str + expected: Literal["trigger", "no-trigger"] + + +@dataclass +class EvalMiss: + prompt: EvalPrompt + graded: Literal["trigger", "no-trigger"] + + @property + def label(self) -> str: + return f"[{self.prompt.expected} -> graded {self.graded}]" + + +@dataclass +class EvalResult: + prompts: list[EvalPrompt] = field(default_factory=list) + misses: list[EvalMiss] = field(default_factory=list) + + @property + def total(self) -> int: + return len(self.prompts) + + @property + def passed(self) -> int: + return self.total - len(self.misses) + + @property + def pass_rate(self) -> float: + return self.passed / self.total if self.total else 0.0 + + +def _read_description(skill_dir: Path) -> str: + """Extract the description: field from SKILL.md frontmatter.""" + skill_md = skill_dir / "SKILL.md" + text = skill_md.read_text(encoding="utf-8") + lines = text.splitlines() + if not lines or lines[0].strip() != "---": + raise RuntimeError(f"{skill_md} has no YAML frontmatter.") + try: + end = lines.index("---", 1) + except ValueError: + raise RuntimeError(f"{skill_md} has no YAML frontmatter.") + meta = yaml.safe_load("\n".join(lines[1:end])) or {} + desc = meta.get("description") + if not isinstance(desc, str) or not desc: + raise RuntimeError(f"{skill_md} has no description: field.") + return desc + + +async def generate_eval_set( + skill_dir: Path, + *, + model: str, + count: int = EVAL_DEFAULT_COUNT, +) -> list[EvalPrompt]: + """Use an LLM to generate ``count`` should-trigger + ``count`` should-not + eval prompts based on the skill's description. + """ + desc = _read_description(skill_dir) + + instructions = ( + "You are designing an evaluation set for a knowledge-base skill. " + f"The skill's activation description is:\n\n" + f" {desc}\n\n" + f"Produce exactly {count} 'should-trigger' user questions (questions where " + f"an agent SHOULD load this skill to answer well) and exactly {count} " + f"'should-not' user questions (plausible-sounding questions about other " + f"topics where this skill is NOT the right tool).\n\n" + f"Output ONLY a JSON object with this exact shape:\n" + f' {{"should_trigger": [...{count} strings...], ' + f'"should_not": [...{count} strings...]}}\n\n' + f"No prose. No markdown. Just the JSON object." + ) + + agent = Agent( + name="eval-set-generator", + instructions=instructions, + model=f"litellm/{model}", + model_settings=ModelSettings(parallel_tool_calls=False), + ) + result = await Runner.run(agent, "Generate the eval set now.", max_turns=3) + raw = (result.final_output or "").strip() + + # Strip optional code fence + if raw.startswith("```"): + raw = raw.split("\n", 1)[1].rsplit("```", 1)[0].strip() + if raw.startswith("json"): + raw = raw[4:].lstrip() + + data = json.loads(raw) + prompts: list[EvalPrompt] = [] + for q in data.get("should_trigger", []): + prompts.append(EvalPrompt(question=q, expected="trigger")) + for q in data.get("should_not", []): + prompts.append(EvalPrompt(question=q, expected="no-trigger")) + return prompts + + +async def grade_one( + description: str, + question: str, + *, + model: str, +) -> Literal["trigger", "no-trigger"]: + """Ask the grader LLM whether the description suggests this skill + should be loaded for the given question.""" + instructions = ( + "You are deciding whether an agent should load a specific skill to " + "answer a user question. You will be given the skill's activation " + "description and a single user question. Answer with one word: " + "TRIGGER (load the skill) or NO-TRIGGER (don't load).\n\n" + f"Skill description:\n {description}\n\n" + "Reply with exactly one of: TRIGGER, NO-TRIGGER." + ) + agent = Agent( + name="trigger-grader", + instructions=instructions, + model=f"litellm/{model}", + model_settings=ModelSettings(parallel_tool_calls=False), + ) + result = await Runner.run(agent, f"Question: {question}", max_turns=2) + raw = (result.final_output or "").strip().upper() + if "NO-TRIGGER" in raw or "NO TRIGGER" in raw: + return "no-trigger" + if "TRIGGER" in raw: + return "trigger" + # Default: assume no-trigger on ambiguous output + return "no-trigger" + + +async def run_eval( + skill_dir: Path, + *, + model: str, + eval_set: list[EvalPrompt] | None = None, + count: int = EVAL_DEFAULT_COUNT, +) -> EvalResult: + """Run a trigger-accuracy evaluation. + + Args: + skill_dir: ``/output/skills/`` + model: LiteLLM model string from KB config + eval_set: pre-generated prompts; if None, generate fresh + count: how many should-trigger + should-not prompts to generate + """ + if eval_set is None: + eval_set = await generate_eval_set(skill_dir, model=model, count=count) + + desc = _read_description(skill_dir) + result = EvalResult(prompts=eval_set) + for prompt in eval_set: + graded = await grade_one(desc, prompt.question, model=model) + if graded != prompt.expected: + result.misses.append(EvalMiss(prompt=prompt, graded=graded)) + return result + + +def save_eval_set( + kb_dir: Path, skill_name: str, prompts: list[EvalPrompt], +) -> Path: + """Persist an eval set to ``/.openkb/eval-sets/.json``.""" + out_dir = kb_dir / ".openkb" / "eval-sets" + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / f"{skill_name}.json" + data = { + "should_trigger": [p.question for p in prompts if p.expected == "trigger"], + "should_not": [p.question for p in prompts if p.expected == "no-trigger"], + } + out_path.write_text(json.dumps(data, indent=2) + "\n", encoding="utf-8") + return out_path + + +def load_eval_set(path: Path) -> list[EvalPrompt]: + """Load an eval set previously saved via ``save_eval_set``.""" + data = json.loads(Path(path).read_text(encoding="utf-8")) + out: list[EvalPrompt] = [] + for q in data.get("should_trigger", []): + out.append(EvalPrompt(question=q, expected="trigger")) + for q in data.get("should_not", []): + out.append(EvalPrompt(question=q, expected="no-trigger")) + return out diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py index 6b2ea87e..7d42297a 100644 --- a/tests/test_skill_cli.py +++ b/tests/test_skill_cli.py @@ -306,3 +306,83 @@ def test_skill_new_keeps_existing_skill_when_key_setup_fails(tmp_path): assert result.exit_code != 0 # Old skill must still be there assert (target / "stale.txt").read_text() == "priceless" + + +# -------------------------------------------------------------------------- +# `openkb skill eval` — trigger-accuracy evaluator +# -------------------------------------------------------------------------- + +def _make_skill_dir(kb_dir, name="demo", description="Triggers for demo questions."): + """Create a minimal compiled skill on disk under /output/skills/.""" + skill_dir = kb_dir / "output" / "skills" / name + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text( + f"---\nname: {name}\ndescription: {description}\n---\n\n# {name}\n", + encoding="utf-8", + ) + return skill_dir + + +def test_skill_eval_runs_with_provided_eval_set(tmp_path): + """Pass a pre-saved eval set + a perfect-grader mock — expect 100% pass.""" + kb = _make_kb(tmp_path) + _make_skill_dir(kb, "demo") + + # Save an eval set we can point --eval-set at. + eval_dir = kb / ".openkb" / "eval-sets" + eval_dir.mkdir(parents=True) + eval_path = eval_dir / "demo.json" + eval_path.write_text(json.dumps({ + "should_trigger": ["t0", "t1"], + "should_not": ["n0", "n1"], + })) + + async def perfect_grader(description, question, *, model): + return "trigger" if question.startswith("t") else "no-trigger" + + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb), \ + patch("openkb.cli._setup_llm_key", return_value=None), \ + patch("openkb.skill_evaluator.grade_one", side_effect=perfect_grader): + result = runner.invoke(cli, [ + "skill", "eval", "demo", "--eval-set", str(eval_path), + ]) + + assert result.exit_code == 0, result.output + assert "Pass rate" in result.output + assert "4/4" in result.output + assert "100%" in result.output + assert "All prompts graded correctly" in result.output + + +def test_skill_eval_reports_misses(tmp_path): + """Grader always returns 'trigger' — the no-trigger half must show as misses.""" + kb = _make_kb(tmp_path) + _make_skill_dir(kb, "demo") + + eval_dir = kb / ".openkb" / "eval-sets" + eval_dir.mkdir(parents=True) + eval_path = eval_dir / "demo.json" + eval_path.write_text(json.dumps({ + "should_trigger": ["t0", "t1"], + "should_not": ["n0", "n1"], + })) + + async def biased_grader(description, question, *, model): + return "trigger" + + runner = CliRunner() + with patch("openkb.cli._find_kb_dir", return_value=kb), \ + patch("openkb.cli._setup_llm_key", return_value=None), \ + patch("openkb.skill_evaluator.grade_one", side_effect=biased_grader): + result = runner.invoke(cli, [ + "skill", "eval", "demo", "--eval-set", str(eval_path), + ]) + + assert result.exit_code == 0, result.output + assert "Pass rate" in result.output + assert "2/4" in result.output + assert "Misses (2)" in result.output + # Each missed prompt must be listed in the output + assert "n0" in result.output + assert "n1" in result.output diff --git a/tests/test_skill_evaluator.py b/tests/test_skill_evaluator.py new file mode 100644 index 00000000..3863051e --- /dev/null +++ b/tests/test_skill_evaluator.py @@ -0,0 +1,232 @@ +"""Tests for openkb.skill_evaluator. + +The Runner.run call is mocked everywhere — no real LLM tokens spent. +What we DO verify: + * Description extraction from SKILL.md frontmatter + * Generator output parsing (with + without code fences) + * Grader response handling (uppercase / lowercase / ambiguous) + * End-to-end run_eval with mocked grading + * Save/load round-trip for persisted eval sets +""" +from __future__ import annotations + +import json +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import AsyncMock, patch + +import pytest + +from openkb.skill_evaluator import ( + EvalMiss, + EvalPrompt, + EvalResult, + _read_description, + generate_eval_set, + grade_one, + load_eval_set, + run_eval, + save_eval_set, +) + + +def _make_skill(tmp_path: Path, *, description: str | None = "Triggers for foo questions.") -> Path: + """Create a minimal SKILL.md and return the skill directory.""" + skill_dir = tmp_path / "output" / "skills" / "demo" + skill_dir.mkdir(parents=True) + if description is None: + body = "---\nname: demo\n---\n\n# demo\n" + else: + body = f"---\nname: demo\ndescription: {description}\n---\n\n# demo\n" + (skill_dir / "SKILL.md").write_text(body, encoding="utf-8") + return skill_dir + + +# -------- _read_description --------------------------------------------------- + + +def test_read_description_extracts_field(tmp_path): + skill_dir = _make_skill(tmp_path, description="Distill thoughts about transformers.") + assert _read_description(skill_dir) == "Distill thoughts about transformers." + + +def test_read_description_raises_on_missing_frontmatter(tmp_path): + skill_dir = tmp_path / "output" / "skills" / "demo" + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text("# demo\n\nNo frontmatter at all.\n") + with pytest.raises(RuntimeError, match="frontmatter"): + _read_description(skill_dir) + + +def test_read_description_raises_on_missing_description_field(tmp_path): + skill_dir = _make_skill(tmp_path, description=None) + with pytest.raises(RuntimeError, match="description"): + _read_description(skill_dir) + + +# -------- generate_eval_set --------------------------------------------------- + + +def _fake_generator_payload(count: int = 10) -> str: + trig = [f"trigger question {i}" for i in range(count)] + no = [f"unrelated question {i}" for i in range(count)] + return json.dumps({"should_trigger": trig, "should_not": no}) + + +@pytest.mark.asyncio +async def test_generate_eval_set_parses_plain_json(tmp_path): + skill_dir = _make_skill(tmp_path) + + async def fake_runner(*args, **kwargs): + return SimpleNamespace(final_output=_fake_generator_payload(10)) + + with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=10) + + assert len(prompts) == 20 + assert sum(1 for p in prompts if p.expected == "trigger") == 10 + assert sum(1 for p in prompts if p.expected == "no-trigger") == 10 + assert prompts[0].question == "trigger question 0" + assert prompts[10].question == "unrelated question 0" + + +@pytest.mark.asyncio +async def test_generate_eval_set_strips_code_fences(tmp_path): + skill_dir = _make_skill(tmp_path) + fenced = "```json\n" + _fake_generator_payload(3) + "\n```" + + async def fake_runner(*args, **kwargs): + return SimpleNamespace(final_output=fenced) + + with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=3) + + assert len(prompts) == 6 + + +# -------- grade_one ----------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_grade_one_returns_trigger_for_trigger_response(): + async def fake_runner(*args, **kwargs): + return SimpleNamespace(final_output="TRIGGER") + + with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + out = await grade_one("desc", "question?", model="gpt-4o-mini") + assert out == "trigger" + + +@pytest.mark.asyncio +async def test_grade_one_returns_no_trigger_for_negative_response(): + async def fake_runner(*args, **kwargs): + return SimpleNamespace(final_output="NO-TRIGGER") + + with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + out = await grade_one("desc", "question?", model="gpt-4o-mini") + assert out == "no-trigger" + + +@pytest.mark.asyncio +async def test_grade_one_handles_mixed_case(): + async def fake_runner(*args, **kwargs): + return SimpleNamespace(final_output="trigger") + + with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + out = await grade_one("desc", "question?", model="gpt-4o-mini") + assert out == "trigger" + + +@pytest.mark.asyncio +async def test_grade_one_handles_space_variant(): + async def fake_runner(*args, **kwargs): + return SimpleNamespace(final_output="No Trigger") + + with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + out = await grade_one("desc", "question?", model="gpt-4o-mini") + assert out == "no-trigger" + + +@pytest.mark.asyncio +async def test_grade_one_defaults_to_no_trigger_on_ambiguous_output(): + async def fake_runner(*args, **kwargs): + return SimpleNamespace(final_output="hmm not sure") + + with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + out = await grade_one("desc", "question?", model="gpt-4o-mini") + assert out == "no-trigger" + + +# -------- run_eval ------------------------------------------------------------ + + +def _build_eval_set(n_trig: int = 3, n_no: int = 3) -> list[EvalPrompt]: + prompts: list[EvalPrompt] = [] + for i in range(n_trig): + prompts.append(EvalPrompt(question=f"trig {i}", expected="trigger")) + for i in range(n_no): + prompts.append(EvalPrompt(question=f"no {i}", expected="no-trigger")) + return prompts + + +@pytest.mark.asyncio +async def test_run_eval_happy_path_all_correct(tmp_path): + skill_dir = _make_skill(tmp_path) + eval_set = _build_eval_set(3, 3) + + async def fake_grade(description, question, *, model): + # Return the ground truth label — perfect grader. + match = next(p for p in eval_set if p.question == question) + return match.expected + + with patch("openkb.skill_evaluator.grade_one", side_effect=fake_grade): + result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set) + + assert isinstance(result, EvalResult) + assert result.total == 6 + assert result.passed == 6 + assert result.pass_rate == 1.0 + assert result.misses == [] + + +@pytest.mark.asyncio +async def test_run_eval_reports_misses(tmp_path): + skill_dir = _make_skill(tmp_path) + eval_set = _build_eval_set(3, 3) + + # Grader always says "trigger" — so the 3 no-trigger prompts will miss. + async def fake_grade(description, question, *, model): + return "trigger" + + with patch("openkb.skill_evaluator.grade_one", side_effect=fake_grade): + result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set) + + assert result.total == 6 + assert result.passed == 3 + assert len(result.misses) == 3 + assert all(m.prompt.expected == "no-trigger" for m in result.misses) + assert all(m.graded == "trigger" for m in result.misses) + assert result.pass_rate == pytest.approx(0.5) + # EvalMiss.label sanity check + assert "no-trigger" in result.misses[0].label + assert "trigger" in result.misses[0].label + + +# -------- save/load round-trip ------------------------------------------------ + + +def test_save_and_load_eval_set_round_trip(tmp_path): + prompts = _build_eval_set(2, 2) + path = save_eval_set(tmp_path, "demo", prompts) + + assert path == tmp_path / ".openkb" / "eval-sets" / "demo.json" + assert path.is_file() + + data = json.loads(path.read_text()) + assert data["should_trigger"] == ["trig 0", "trig 1"] + assert data["should_not"] == ["no 0", "no 1"] + + loaded = load_eval_set(path) + assert len(loaded) == 4 + assert [p.question for p in loaded if p.expected == "trigger"] == ["trig 0", "trig 1"] + assert [p.question for p in loaded if p.expected == "no-trigger"] == ["no 0", "no 1"] From a5fe56715cda7a70a2f007849c1b448b2ddf0c31 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 16:20:18 +0800 Subject: [PATCH 22/25] docs(readme): document validate / eval / history / rollback commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skill Factory section now lists the 4 quality-gate commands borrowed from Codex (validate) and Anthropic skill-creator (eval + iteration): openkb skill validate structural lint (frontmatter, sizes, refs) openkb skill eval trigger-accuracy test of the description openkb skill history list past iterations openkb skill rollback restore a previous iteration The slogan promised a 'digital expert' — these commands are what makes the output worth that label. --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index a8010e02..3b0f0052 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,9 @@ A "generator" reads from the compiled wiki and produces something usable: an ans | openkb query "question" | A grounded answer with citations (use `--save` to persist to `wiki/explorations/`) | | `openkb chat` | Interactive multi-turn session over the wiki (use `--resume`, `--list`, `--delete` to manage sessions) | | openkb skill new <name> "<intent>" | A redistributable Anthropic Skill at `/output/skills//` + auto-updated `marketplace.json` | +| openkb skill validate [name] | Structural lint of compiled skills (frontmatter, file sizes, wikilinks, scripts/ stdlib check with `--strict`). Auto-runs at end of `skill new` | +| openkb skill eval <name> | Trigger-accuracy evaluation — does the `description:` field actually fire? LLM generates eval prompts; grader LLM scores activation. `--save` persists the eval set | +| openkb skill history <name> / openkb skill rollback <name> | Iteration workspace — every overwrite saves the previous version to `output/skills/-workspace/iteration-N/` with a structural diff. Rollback restores any iteration | ### Query & Chat — ask the wiki @@ -256,6 +259,21 @@ npx skills@latest add / [agent edits SKILL.md frontmatter in place] ``` +**Quality gates** — borrowing from [Codex skill-creator](https://github.com/openai/skills) (structural validation) and [Anthropic skill-creator](https://github.com/anthropics/skills/tree/main/skills/skill-creator) (trigger-accuracy evals): + +```bash +# Lint structure (auto-runs at end of `skill new`) +openkb skill validate karpathy-thinking +openkb skill validate --strict # treat warnings as failures + +# Does the description actually fire when it should? +openkb skill eval karpathy-thinking --save + +# History + rollback if a new iteration regresses +openkb skill history karpathy-thinking +openkb skill rollback karpathy-thinking --to 2 +``` + See [CONTRIBUTING.md](CONTRIBUTING.md) for how to submit your compiled skill back to the community registry at [VectifyAI/OpenKB](https://github.com/VectifyAI/OpenKB). ### Configuration From bc916bc2cb25a091a3172335f49209de2671c93d Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 16:39:45 +0800 Subject: [PATCH 23/25] fix(skill): round-2 review fixes + drop community docs Code review round-2 flagged the eval pipeline reintroducing the MaxTurnsExceeded/JSONDecodeError traceback leak that round-1 caught for skill new. Apply the same shim inside skill_evaluator + 4 other carryover items: - Translate MaxTurnsExceeded and json.JSONDecodeError to RuntimeError inside generate_eval_set and grade_one. CLI catch (RuntimeError) now covers both. - Wrap _setup_llm_key in skill_eval with the same try/except/exit pattern as skill_new / query / chat. - Move openkb/skill_evaluator.py -> openkb/agent/skill_evaluator.py. Modules that construct Agent live under openkb/agent/ per repo convention; top-level openkb/ keeps marketplace + generator (no agents SDK). - Validator: reject '<' / '>' in description (Anthropic parser requirement); warn on unknown frontmatter keys (Anthropic spec allows a fixed set). - Drop redundant in-function 'import asyncio' from skill_eval (already at module top). - Drop unused EvalMiss import from tests. - Validator module docstring updated to enumerate all checks. Also delete community contribution scaffolding (CONTRIBUTING.md + .github/PULL_REQUEST_TEMPLATE/skill_submission.md) - premature for the project's current stage; will revisit when real contributors arrive. --- .../PULL_REQUEST_TEMPLATE/skill_submission.md | 22 ------ CONTRIBUTING.md | 71 ------------------- README.md | 2 - openkb/{ => agent}/skill_evaluator.py | 27 ++++++- openkb/cli.py | 9 ++- openkb/skill_validator.py | 32 +++++++-- tests/test_skill_cli.py | 4 +- tests/test_skill_evaluator.py | 56 +++++++++++---- tests/test_skill_validator.py | 32 +++++++++ 9 files changed, 134 insertions(+), 121 deletions(-) delete mode 100644 .github/PULL_REQUEST_TEMPLATE/skill_submission.md delete mode 100644 CONTRIBUTING.md rename openkb/{ => agent}/skill_evaluator.py (87%) diff --git a/.github/PULL_REQUEST_TEMPLATE/skill_submission.md b/.github/PULL_REQUEST_TEMPLATE/skill_submission.md deleted file mode 100644 index a413744a..00000000 --- a/.github/PULL_REQUEST_TEMPLATE/skill_submission.md +++ /dev/null @@ -1,22 +0,0 @@ -## Skill submission - -- **Name:** `` -- **What it does (one sentence):** -- **Source material:** - -### Checklist - -- [ ] I have the right to redistribute the source material this skill is - derived from (public, my own work, properly licensed, or fair-use - methodology only — not bulk-copied copyrighted text). -- [ ] `SKILL.md` has valid YAML frontmatter with `name:` and `description:` - (description ≤ 1024 chars). -- [ ] All `[[references/...]]` links resolve. -- [ ] I added an entry under `plugins[0].skills` in - `.claude-plugin/marketplace.json`. -- [ ] I installed and tested this skill locally in at least one agent CLI - (Claude Code / Codex CLI / Gemini CLI / Cursor). - -### Notes for reviewers - - diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index dfbb7eb2..00000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,71 +0,0 @@ -# Contributing to OpenKB - -Thanks for your interest in OpenKB! There are two ways to contribute: code (bug -fixes, new features) and skills (compiled knowledge artifacts). - -## Code contributions - -Standard GitHub workflow: - -1. Fork the repo and create a feature branch off `main`. -2. Add tests for any new behaviour. Run `uv run pytest` to verify they pass. -3. Open a PR with a clear description and link to any related issues. -4. Address review feedback. A maintainer will merge once everything looks good. - -## Submitting a skill - -OpenKB doubles as a registry for compiled-knowledge skills. The `skills/` -directory in this repo hosts the official `openkb` skill plus -community-contributed skills installable via: - -```bash -npx skills@latest add VectifyAI/OpenKB -``` - -To submit your skill: - -### 1. Compile and test locally - -```bash -cd -openkb skill new "" -cp -r output/skills/ ~/.claude/skills/ -# Verify the skill activates in a Claude Code session on a relevant question -``` - -### 2. Open a PR against this repo - -1. Fork `VectifyAI/OpenKB`. -2. Copy your skill into the fork at `skills//`. -3. Add a new entry under `plugins[0].skills` in `.claude-plugin/marketplace.json`: - ```json - "./skills/" - ``` -4. Open a PR with the "Skill submission" template (auto-applied when you create - the PR). - -### 3. Review - -A maintainer will review your submission against this checklist: - -- `SKILL.md` has a valid YAML frontmatter with `name:` and `description:` - (description ≤ 1024 characters). -- The `description` is specific (not "a skill about X"). -- All `[[references/...]]` links resolve to files you actually included. -- Skill name doesn't clash with an existing entry. -- No obvious copyright-infringing content (large verbatim copies of recent - copyrighted books or papers without authorisation). - -You're responsible for confirming you have rights to redistribute the source -material your skill is derived from. The PR template includes a checkbox for -this. - -### What makes a good community skill - -- **Methodology-focused**, not bulk-copied content. "How to reason about X" - beats "the full text of X". -- **Specific scope** so the `description:` field can be precise enough that - loading agents pick it up at the right time. -- **Tested in at least one agent CLI** before submission. - -Thanks for sharing what you've compiled! diff --git a/README.md b/README.md index 3b0f0052..5497ec4a 100644 --- a/README.md +++ b/README.md @@ -274,8 +274,6 @@ openkb skill history karpathy-thinking openkb skill rollback karpathy-thinking --to 2 ``` -See [CONTRIBUTING.md](CONTRIBUTING.md) for how to submit your compiled skill back to the community registry at [VectifyAI/OpenKB](https://github.com/VectifyAI/OpenKB). - ### Configuration Settings are initialized by `openkb init`, and stored in `.openkb/config.yaml`: diff --git a/openkb/skill_evaluator.py b/openkb/agent/skill_evaluator.py similarity index 87% rename from openkb/skill_evaluator.py rename to openkb/agent/skill_evaluator.py index 6aa9a15d..979df4f0 100644 --- a/openkb/skill_evaluator.py +++ b/openkb/agent/skill_evaluator.py @@ -117,7 +117,14 @@ async def generate_eval_set( model=f"litellm/{model}", model_settings=ModelSettings(parallel_tool_calls=False), ) - result = await Runner.run(agent, "Generate the eval set now.", max_turns=3) + from agents.exceptions import MaxTurnsExceeded + try: + result = await Runner.run(agent, "Generate the eval set now.", max_turns=3) + except MaxTurnsExceeded as exc: + raise RuntimeError( + "Eval set generation hit the max-turn cap. The model may be " + "looping; try a different model or a smaller --count." + ) from exc raw = (result.final_output or "").strip() # Strip optional code fence @@ -126,7 +133,14 @@ async def generate_eval_set( if raw.startswith("json"): raw = raw[4:].lstrip() - data = json.loads(raw) + try: + data = json.loads(raw) + except json.JSONDecodeError as exc: + raise RuntimeError( + f"Eval set generator returned non-JSON output: {exc.msg}. " + f"Try a more capable model — small models often ignore " + f"'output only JSON' instructions. First 200 chars: {raw[:200]!r}" + ) from exc prompts: list[EvalPrompt] = [] for q in data.get("should_trigger", []): prompts.append(EvalPrompt(question=q, expected="trigger")) @@ -157,7 +171,14 @@ async def grade_one( model=f"litellm/{model}", model_settings=ModelSettings(parallel_tool_calls=False), ) - result = await Runner.run(agent, f"Question: {question}", max_turns=2) + from agents.exceptions import MaxTurnsExceeded + try: + result = await Runner.run(agent, f"Question: {question}", max_turns=2) + except MaxTurnsExceeded as exc: + raise RuntimeError( + f"Trigger grader hit the max-turn cap on question: {question!r}. " + f"Try a more capable model." + ) from exc raw = (result.final_output or "").strip().upper() if "NO-TRIGGER" in raw or "NO TRIGGER" in raw: return "no-trigger" diff --git a/openkb/cli.py b/openkb/cli.py index 75e2dce7..84718543 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -1744,8 +1744,7 @@ def skill_eval(ctx, name, save_flag, eval_set_path, count): the description should activate the skill for each prompt. Prints pass rate + miss list. """ - import asyncio - from openkb.skill_evaluator import ( + from openkb.agent.skill_evaluator import ( run_eval, save_eval_set, load_eval_set, EvalPrompt, ) @@ -1759,7 +1758,11 @@ def skill_eval(ctx, name, save_flag, eval_set_path, count): click.echo(f"[ERROR] Skill '{name}' not found.", err=True) ctx.exit(1) - _setup_llm_key(kb_dir) + try: + _setup_llm_key(kb_dir) + except RuntimeError as exc: + click.echo(f"[ERROR] {exc}", err=True) + ctx.exit(1) config = load_config(kb_dir / ".openkb" / "config.yaml") model = config.get("model", DEFAULT_CONFIG["model"]) diff --git a/openkb/skill_validator.py b/openkb/skill_validator.py index 82de55d4..aecff89f 100644 --- a/openkb/skill_validator.py +++ b/openkb/skill_validator.py @@ -4,12 +4,15 @@ make a skill un-loadable or misleading to the agents that install it: * SKILL.md missing or unparseable - * frontmatter missing required fields - * name field doesn't match directory or violates the slug rule - * description too long (> 1024 chars per Anthropic spec) - * files too big (SKILL.md > 50 KB / references/*.md > 100 KB) - * `[[references/...]]` wikilinks pointing at files that don't exist - * (strict mode) scripts/*.py importing non-stdlib modules + * frontmatter present, parses as YAML, is a mapping + * required fields: name (matches dir + slug regex), description + * description length within bounds (warns < 20 chars, errors > 1024) + * description must not contain '<' or '>' (breaks activation parser) + * frontmatter keys limited to the Anthropic Skills allowed set + (warns on unknown keys; matches Anthropic's quick_validate.py) + * files within size limits (SKILL.md ≤ 50 KB / references/*.md ≤ 100 KB) + * `[[references/...]]` wikilinks resolve to actual files + * (strict mode) scripts/*.py imports only stdlib modules This is the deterministic counterpart to ``openkb skill eval`` — eval measures whether the description fires; validate ensures the structure @@ -32,6 +35,9 @@ REFERENCE_MAX_BYTES = 100 * 1024 NAME_MAX_LEN = 64 WIKILINK_RE = re.compile(r"\[\[references/([a-z0-9._/-]+)\]\]", re.IGNORECASE) +ALLOWED_FRONTMATTER_KEYS = { + "name", "description", "license", "allowed-tools", "metadata", "compatibility", +} @dataclass @@ -98,6 +104,15 @@ def validate_skill(skill_dir: Path, *, strict: bool = False) -> ValidationResult result.errors.append("Frontmatter must be a YAML mapping.") return result + extras = set(meta.keys()) - ALLOWED_FRONTMATTER_KEYS + if extras: + # Treat as warning, not error — keeps strict mode user-controllable + result.warnings.append( + f"Frontmatter contains unknown keys: {sorted(extras)}. " + f"Anthropic Skills spec only allows: " + f"{sorted(ALLOWED_FRONTMATTER_KEYS)}." + ) + # name field name = meta.get("name") if not name: @@ -133,6 +148,11 @@ def validate_skill(skill_dir: Path, *, strict: bool = False) -> ValidationResult f"Frontmatter 'description:' is only {len(desc)} chars — " f"too short to be a useful activation signal." ) + if "<" in desc or ">" in desc: + result.errors.append( + "Frontmatter 'description:' must not contain '<' or '>' " + "characters — they break the activation parser in Claude Code." + ) # references/ wikilink resolution wikilinks = WIKILINK_RE.findall(text) diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py index 7d42297a..086eacb9 100644 --- a/tests/test_skill_cli.py +++ b/tests/test_skill_cli.py @@ -343,7 +343,7 @@ async def perfect_grader(description, question, *, model): runner = CliRunner() with patch("openkb.cli._find_kb_dir", return_value=kb), \ patch("openkb.cli._setup_llm_key", return_value=None), \ - patch("openkb.skill_evaluator.grade_one", side_effect=perfect_grader): + patch("openkb.agent.skill_evaluator.grade_one", side_effect=perfect_grader): result = runner.invoke(cli, [ "skill", "eval", "demo", "--eval-set", str(eval_path), ]) @@ -374,7 +374,7 @@ async def biased_grader(description, question, *, model): runner = CliRunner() with patch("openkb.cli._find_kb_dir", return_value=kb), \ patch("openkb.cli._setup_llm_key", return_value=None), \ - patch("openkb.skill_evaluator.grade_one", side_effect=biased_grader): + patch("openkb.agent.skill_evaluator.grade_one", side_effect=biased_grader): result = runner.invoke(cli, [ "skill", "eval", "demo", "--eval-set", str(eval_path), ]) diff --git a/tests/test_skill_evaluator.py b/tests/test_skill_evaluator.py index 3863051e..01826f76 100644 --- a/tests/test_skill_evaluator.py +++ b/tests/test_skill_evaluator.py @@ -1,4 +1,4 @@ -"""Tests for openkb.skill_evaluator. +"""Tests for openkb.agent.skill_evaluator. The Runner.run call is mocked everywhere — no real LLM tokens spent. What we DO verify: @@ -17,8 +17,7 @@ import pytest -from openkb.skill_evaluator import ( - EvalMiss, +from openkb.agent.skill_evaluator import ( EvalPrompt, EvalResult, _read_description, @@ -80,7 +79,7 @@ async def test_generate_eval_set_parses_plain_json(tmp_path): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output=_fake_generator_payload(10)) - with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=10) assert len(prompts) == 20 @@ -98,7 +97,7 @@ async def test_generate_eval_set_strips_code_fences(tmp_path): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output=fenced) - with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=3) assert len(prompts) == 6 @@ -112,7 +111,7 @@ async def test_grade_one_returns_trigger_for_trigger_response(): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="TRIGGER") - with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): out = await grade_one("desc", "question?", model="gpt-4o-mini") assert out == "trigger" @@ -122,7 +121,7 @@ async def test_grade_one_returns_no_trigger_for_negative_response(): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="NO-TRIGGER") - with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): out = await grade_one("desc", "question?", model="gpt-4o-mini") assert out == "no-trigger" @@ -132,7 +131,7 @@ async def test_grade_one_handles_mixed_case(): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="trigger") - with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): out = await grade_one("desc", "question?", model="gpt-4o-mini") assert out == "trigger" @@ -142,7 +141,7 @@ async def test_grade_one_handles_space_variant(): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="No Trigger") - with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): out = await grade_one("desc", "question?", model="gpt-4o-mini") assert out == "no-trigger" @@ -152,7 +151,7 @@ async def test_grade_one_defaults_to_no_trigger_on_ambiguous_output(): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="hmm not sure") - with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): out = await grade_one("desc", "question?", model="gpt-4o-mini") assert out == "no-trigger" @@ -179,7 +178,7 @@ async def fake_grade(description, question, *, model): match = next(p for p in eval_set if p.question == question) return match.expected - with patch("openkb.skill_evaluator.grade_one", side_effect=fake_grade): + with patch("openkb.agent.skill_evaluator.grade_one", side_effect=fake_grade): result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set) assert isinstance(result, EvalResult) @@ -198,7 +197,7 @@ async def test_run_eval_reports_misses(tmp_path): async def fake_grade(description, question, *, model): return "trigger" - with patch("openkb.skill_evaluator.grade_one", side_effect=fake_grade): + with patch("openkb.agent.skill_evaluator.grade_one", side_effect=fake_grade): result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set) assert result.total == 6 @@ -230,3 +229,36 @@ def test_save_and_load_eval_set_round_trip(tmp_path): assert len(loaded) == 4 assert [p.question for p in loaded if p.expected == "trigger"] == ["trig 0", "trig 1"] assert [p.question for p in loaded if p.expected == "no-trigger"] == ["no 0", "no 1"] + + +# -------- RuntimeError translation for CLI catch ------------------------------- + + +@pytest.mark.asyncio +async def test_generate_eval_set_translates_max_turns_to_runtime_error(tmp_path): + """MaxTurnsExceeded from Runner.run should become RuntimeError.""" + from agents.exceptions import MaxTurnsExceeded + + skill_dir = _make_skill(tmp_path) + + async def fake_runner(*args, **kwargs): + raise MaxTurnsExceeded("ran out") + + with patch("openkb.agent.skill_evaluator.Runner.run", + new=AsyncMock(side_effect=fake_runner)): + with pytest.raises(RuntimeError, match="max-turn cap"): + await generate_eval_set(skill_dir, model="gpt-4o-mini") + + +@pytest.mark.asyncio +async def test_generate_eval_set_translates_malformed_json_to_runtime_error(tmp_path): + """Non-JSON LLM output should produce a friendly RuntimeError.""" + skill_dir = _make_skill(tmp_path) + + async def fake_runner(*args, **kwargs): + return SimpleNamespace(final_output="this is not json at all") + + with patch("openkb.agent.skill_evaluator.Runner.run", + new=AsyncMock(side_effect=fake_runner)): + with pytest.raises(RuntimeError, match="non-JSON output"): + await generate_eval_set(skill_dir, model="gpt-4o-mini") diff --git a/tests/test_skill_validator.py b/tests/test_skill_validator.py index 06fad87a..b59b6d31 100644 --- a/tests/test_skill_validator.py +++ b/tests/test_skill_validator.py @@ -303,3 +303,35 @@ def test_passed_vs_passed_strict_semantics(tmp_path): result = validate_skill(sd) assert result.passed is True assert result.passed_strict is False + + +# --------------------------------------------------------------------------- +# new round-2 checks: angle brackets in description + unknown frontmatter keys +# --------------------------------------------------------------------------- + +def test_validator_rejects_angle_brackets_in_description(tmp_path): + """Anthropic's activation parser breaks on < or > in description.""" + sd = _write_skill( + tmp_path, "demo", + frontmatter="name: demo\ndescription: Reason about here.", + ) + result = validate_skill(sd) + assert not result.passed + assert any("'<' or '>'" in e for e in result.errors) + + +def test_validator_warns_on_unknown_frontmatter_keys(tmp_path): + """Anthropic spec only allows a fixed set of frontmatter keys.""" + sd = _write_skill( + tmp_path, "demo", + frontmatter=( + "name: demo\ndescription: A valid description string here.\n" + "random_key: foo\nanother_one: bar" + ), + ) + result = validate_skill(sd) + # Passes in default (warnings only) + assert result.passed + # But strict mode catches it + assert not result.passed_strict + assert any("unknown keys" in w for w in result.warnings) From a4e29b1c4007197ef989594876c31f2bf9b6e803 Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 16:47:20 +0800 Subject: [PATCH 24/25] refactor(skill): consolidate skill modules under openkb/skill/ All skill-related code now lives in a single openkb/skill/ subpackage (7 modules + __init__). Drop the redundant 'skill_' prefix on filenames since the package qualifies them already. Moves: openkb/generator.py -> openkb/skill/generator.py openkb/marketplace.py -> openkb/skill/marketplace.py openkb/skill_validator.py -> openkb/skill/validator.py openkb/skill_workspace.py -> openkb/skill/workspace.py openkb/agent/skill_creator.py -> openkb/skill/creator.py openkb/agent/skill_tools.py -> openkb/skill/tools.py openkb/agent/skill_evaluator.py -> openkb/skill/evaluator.py generator.py + marketplace.py go under skill/ for now (v0.x only has skills); they're nominally generic primitives but YAGNI -- when a second artifact type (ppt / podcast / report) actually lands, those two will move back out to openkb//. No behavioral changes. All imports + test patch targets updated. Test suite stays at 494 passed. --- openkb/agent/chat.py | 2 +- openkb/cli.py | 16 ++++++------ openkb/skill/__init__.py | 8 ++++++ .../skill_creator.py => skill/creator.py} | 2 +- .../skill_evaluator.py => skill/evaluator.py} | 0 openkb/{ => skill}/generator.py | 6 ++--- openkb/{ => skill}/marketplace.py | 0 .../{agent/skill_tools.py => skill/tools.py} | 2 +- .../validator.py} | 0 .../workspace.py} | 0 tests/test_generator.py | 8 +++--- tests/test_marketplace.py | 4 +-- tests/test_skill_chat_slash.py | 2 +- tests/test_skill_cli.py | 10 +++---- tests/test_skill_creator.py | 10 +++---- tests/test_skill_evaluator.py | 26 +++++++++---------- tests/test_skill_tools.py | 4 +-- tests/test_skill_validator.py | 4 +-- tests/test_skill_workspace.py | 4 +-- 19 files changed, 58 insertions(+), 50 deletions(-) create mode 100644 openkb/skill/__init__.py rename openkb/{agent/skill_creator.py => skill/creator.py} (99%) rename openkb/{agent/skill_evaluator.py => skill/evaluator.py} (100%) rename openkb/{ => skill}/generator.py (92%) rename openkb/{ => skill}/marketplace.py (100%) rename openkb/{agent/skill_tools.py => skill/tools.py} (96%) rename openkb/{skill_validator.py => skill/validator.py} (100%) rename openkb/{skill_workspace.py => skill/workspace.py} (100%) diff --git a/openkb/agent/chat.py b/openkb/agent/chat.py index afabc745..a2763a73 100644 --- a/openkb/agent/chat.py +++ b/openkb/agent/chat.py @@ -542,7 +542,7 @@ async def _handle_slash_skill(arg: str, kb_dir: Path, style: Style) -> None: config = load_config(kb_dir / ".openkb" / "config.yaml") model = config.get("model", DEFAULT_CONFIG["model"]) - from openkb.generator import Generator + from openkb.skill.generator import Generator _fmt(style, ("class:slash.help", f"Compiling skill '{name}'...\n")) try: gen = Generator( diff --git a/openkb/cli.py b/openkb/cli.py index 84718543..79370741 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -1466,7 +1466,7 @@ def skill_new(ctx, name, intent, yes_flag): # into /output/skills/-workspace/iteration-N/ first, so # the user can roll back via `openkb skill rollback`. See # ``openkb/skill_workspace.py``. - from openkb.skill_workspace import save_iteration, write_diff + from openkb.skill.workspace import save_iteration, write_diff target = kb_dir / "output" / "skills" / name saved_iteration: Path | None = None @@ -1492,7 +1492,7 @@ def skill_new(ctx, name, intent, yes_flag): ctx.exit(1) # Run the generator - from openkb.generator import Generator + from openkb.skill.generator import Generator click.echo(f"Compiling skill '{name}'...") try: gen = Generator( @@ -1519,7 +1519,7 @@ def skill_new(ctx, name, intent, yes_flag): # Auto-validate the freshly compiled skill. Surface issues but don't # block — files are on disk and the user can fix or rollback. - from openkb.skill_validator import validate_skill + from openkb.skill.validator import validate_skill skill_dir = kb_dir / "output" / "skills" / name result = validate_skill(skill_dir) if result.errors or result.warnings: @@ -1551,7 +1551,7 @@ def skill_history(ctx, name): """List previous iterations of a skill.""" import datetime as _dt - from openkb.skill_workspace import list_iterations + from openkb.skill.workspace import list_iterations kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) if kb_dir is None: @@ -1611,8 +1611,8 @@ def skill_history(ctx, name): @click.pass_context def skill_rollback(ctx, name, to_n, yes_flag): """Restore a previous iteration as the current skill.""" - from openkb.marketplace import regenerate_marketplace - from openkb.skill_workspace import list_iterations, restore_iteration + from openkb.skill.marketplace import regenerate_marketplace + from openkb.skill.workspace import list_iterations, restore_iteration kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) if kb_dir is None: @@ -1681,7 +1681,7 @@ def skill_rollback(ctx, name, to_n, yes_flag): @click.pass_context def skill_validate(ctx, name, strict): """Validate one skill (by name) or all compiled skills in this KB.""" - from openkb.skill_validator import validate_skill + from openkb.skill.validator import validate_skill kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) if kb_dir is None: @@ -1744,7 +1744,7 @@ def skill_eval(ctx, name, save_flag, eval_set_path, count): the description should activate the skill for each prompt. Prints pass rate + miss list. """ - from openkb.agent.skill_evaluator import ( + from openkb.skill.evaluator import ( run_eval, save_eval_set, load_eval_set, EvalPrompt, ) diff --git a/openkb/skill/__init__.py b/openkb/skill/__init__.py new file mode 100644 index 00000000..53b8c9c3 --- /dev/null +++ b/openkb/skill/__init__.py @@ -0,0 +1,8 @@ +"""All skill-related code (generator, marketplace, agent runtime, validator, +workspace, evaluator) lives in this subpackage. + +Today's only artifact type is ``skill``; the generator + marketplace +abstractions are nominally generic, but in v0.x they only serve skill +artifacts. If/when ppt / podcast / report targets land, factor the +generic primitives back out to ``openkb//`` at that time. +""" diff --git a/openkb/agent/skill_creator.py b/openkb/skill/creator.py similarity index 99% rename from openkb/agent/skill_creator.py rename to openkb/skill/creator.py index 2e023c4e..d8db0314 100644 --- a/openkb/agent/skill_creator.py +++ b/openkb/skill/creator.py @@ -18,7 +18,7 @@ from agents import Agent, Runner, function_tool from agents.model_settings import ModelSettings -from openkb.agent.skill_tools import ( +from openkb.skill.tools import ( list_wiki_dir as _list_wiki_dir_impl, read_wiki_file_for_skill as _read_wiki_file_impl, write_skill_file as _write_skill_file_impl, diff --git a/openkb/agent/skill_evaluator.py b/openkb/skill/evaluator.py similarity index 100% rename from openkb/agent/skill_evaluator.py rename to openkb/skill/evaluator.py diff --git a/openkb/generator.py b/openkb/skill/generator.py similarity index 92% rename from openkb/generator.py rename to openkb/skill/generator.py index 843a1b06..1517aa8b 100644 --- a/openkb/generator.py +++ b/openkb/skill/generator.py @@ -9,15 +9,15 @@ distribution mechanic as skills) Each target plugs in its own `run` coroutine. v0.1's only entry calls into -``openkb.agent.skill_creator.run_skill_create``. +``openkb.skill.creator.run_skill_create``. """ from __future__ import annotations from pathlib import Path from typing import Literal -from openkb.agent.skill_creator import run_skill_create -from openkb.marketplace import regenerate_marketplace +from openkb.skill.creator import run_skill_create +from openkb.skill.marketplace import regenerate_marketplace TargetType = Literal["skill"] # extend as new targets land diff --git a/openkb/marketplace.py b/openkb/skill/marketplace.py similarity index 100% rename from openkb/marketplace.py rename to openkb/skill/marketplace.py diff --git a/openkb/agent/skill_tools.py b/openkb/skill/tools.py similarity index 96% rename from openkb/agent/skill_tools.py rename to openkb/skill/tools.py index 3b7c736e..357b3a29 100644 --- a/openkb/agent/skill_tools.py +++ b/openkb/skill/tools.py @@ -2,7 +2,7 @@ The skill-create agent runs with three capabilities: * READ from /wiki/** (the substrate) - * QUERY the wiki via openkb query (semantic retrieval, see skill_creator.py) + * QUERY the wiki via openkb query (semantic retrieval, see creator.py) * WRITE under /output/skills//** (the only output destination) These helpers enforce those boundaries at the Python level — every tool diff --git a/openkb/skill_validator.py b/openkb/skill/validator.py similarity index 100% rename from openkb/skill_validator.py rename to openkb/skill/validator.py diff --git a/openkb/skill_workspace.py b/openkb/skill/workspace.py similarity index 100% rename from openkb/skill_workspace.py rename to openkb/skill/workspace.py diff --git a/tests/test_generator.py b/tests/test_generator.py index 1ef179f5..37d574f3 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -1,4 +1,4 @@ -"""Tests for openkb.generator.Generator — the v0.1 abstraction that will +"""Tests for openkb.skill.generator.Generator — the v0.1 abstraction that will be reused by future ppt / podcast generators. In v0.1, only target_type='skill' is supported. We test the dispatch shape @@ -8,7 +8,7 @@ import pytest from unittest.mock import AsyncMock, patch -from openkb.generator import Generator +from openkb.skill.generator import Generator def _make_kb(tmp_path): @@ -53,8 +53,8 @@ async def test_generator_run_delegates_to_skill_creator(tmp_path): kb_dir=kb, model="gpt-4o-mini", ) - with patch("openkb.generator.run_skill_create", new=AsyncMock()) as runner, \ - patch("openkb.generator.regenerate_marketplace") as regen: + with patch("openkb.skill.generator.run_skill_create", new=AsyncMock()) as runner, \ + patch("openkb.skill.generator.regenerate_marketplace") as regen: await g.run() runner.assert_awaited_once() regen.assert_called_once_with(kb) diff --git a/tests/test_marketplace.py b/tests/test_marketplace.py index a34c34ba..b122f6b7 100644 --- a/tests/test_marketplace.py +++ b/tests/test_marketplace.py @@ -1,11 +1,11 @@ -"""Tests for openkb.marketplace — regenerate /.claude-plugin/marketplace.json +"""Tests for openkb.skill.marketplace — regenerate /.claude-plugin/marketplace.json from /output/skills/*/SKILL.md.""" from __future__ import annotations import json import textwrap -from openkb.marketplace import regenerate_marketplace +from openkb.skill.marketplace import regenerate_marketplace def _make_kb(tmp_path): diff --git a/tests/test_skill_chat_slash.py b/tests/test_skill_chat_slash.py index 100135a2..d99dd152 100644 --- a/tests/test_skill_chat_slash.py +++ b/tests/test_skill_chat_slash.py @@ -36,7 +36,7 @@ async def fake_run(kb_dir, skill_name, intent, model): f"---\nname: {skill_name}\ndescription: t\n---\n\n# {skill_name}\n" ) - with patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): + with patch("openkb.skill.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): action = await _handle_slash( '/skill new demo "test intent"', kb, session, style ) diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py index 086eacb9..b8575327 100644 --- a/tests/test_skill_cli.py +++ b/tests/test_skill_cli.py @@ -42,7 +42,7 @@ async def fake_run(kb_dir, skill_name, intent, model): _fake_compile(kb_dir, skill_name) with patch("openkb.cli._find_kb_dir", return_value=kb), \ - patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): + patch("openkb.skill.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): result = runner.invoke(cli, ["skill", "new", "demo", "test intent"]) assert result.exit_code == 0, result.output @@ -123,7 +123,7 @@ async def fake_run(kb_dir, skill_name, intent, model): _fake_compile(kb_dir, skill_name) with patch("openkb.cli._find_kb_dir", return_value=kb), \ - patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): + patch("openkb.skill.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"]) assert result.exit_code == 0, result.output @@ -147,7 +147,7 @@ async def fake_run(kb_dir, skill_name, intent, model): _fake_compile(kb_dir, skill_name) with patch("openkb.cli._find_kb_dir", return_value=kb), \ - patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): + patch("openkb.skill.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)): result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"]) assert result.exit_code == 0, result.output @@ -343,7 +343,7 @@ async def perfect_grader(description, question, *, model): runner = CliRunner() with patch("openkb.cli._find_kb_dir", return_value=kb), \ patch("openkb.cli._setup_llm_key", return_value=None), \ - patch("openkb.agent.skill_evaluator.grade_one", side_effect=perfect_grader): + patch("openkb.skill.evaluator.grade_one", side_effect=perfect_grader): result = runner.invoke(cli, [ "skill", "eval", "demo", "--eval-set", str(eval_path), ]) @@ -374,7 +374,7 @@ async def biased_grader(description, question, *, model): runner = CliRunner() with patch("openkb.cli._find_kb_dir", return_value=kb), \ patch("openkb.cli._setup_llm_key", return_value=None), \ - patch("openkb.agent.skill_evaluator.grade_one", side_effect=biased_grader): + patch("openkb.skill.evaluator.grade_one", side_effect=biased_grader): result = runner.invoke(cli, [ "skill", "eval", "demo", "--eval-set", str(eval_path), ]) diff --git a/tests/test_skill_creator.py b/tests/test_skill_creator.py index e166c418..5783d628 100644 --- a/tests/test_skill_creator.py +++ b/tests/test_skill_creator.py @@ -1,4 +1,4 @@ -"""Tests for openkb.agent.skill_creator. +"""Tests for openkb.skill.creator. The agent itself is mocked (we don't want to spend tokens in unit tests). What we DO test: @@ -15,7 +15,7 @@ import pytest from unittest.mock import AsyncMock, patch -from openkb.agent.skill_creator import ( +from openkb.skill.creator import ( build_skill_create_agent, run_skill_create, ) @@ -56,7 +56,7 @@ async def fake_runner(*args, **kwargs): from types import SimpleNamespace return SimpleNamespace(final_output="done") - with patch("openkb.agent.skill_creator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.skill.creator.Runner.run", new=AsyncMock(side_effect=fake_runner)): await run_skill_create( kb_dir=kb, skill_name="demo", @@ -77,7 +77,7 @@ async def fake_runner(*args, **kwargs): from types import SimpleNamespace return SimpleNamespace(final_output="done") - with patch("openkb.agent.skill_creator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.skill.creator.Runner.run", new=AsyncMock(side_effect=fake_runner)): with pytest.raises(RuntimeError, match="did not write SKILL.md"): await run_skill_create( kb_dir=kb, @@ -98,7 +98,7 @@ async def test_run_skill_create_translates_max_turns_to_runtime_error(tmp_path): async def fake_runner(*args, **kwargs): raise MaxTurnsExceeded("agent ran out of turns") - with patch("openkb.agent.skill_creator.Runner.run", + with patch("openkb.skill.creator.Runner.run", new=AsyncMock(side_effect=fake_runner)): with pytest.raises(RuntimeError, match="step cap"): await run_skill_create( diff --git a/tests/test_skill_evaluator.py b/tests/test_skill_evaluator.py index 01826f76..840bbbfa 100644 --- a/tests/test_skill_evaluator.py +++ b/tests/test_skill_evaluator.py @@ -1,4 +1,4 @@ -"""Tests for openkb.agent.skill_evaluator. +"""Tests for openkb.skill.evaluator. The Runner.run call is mocked everywhere — no real LLM tokens spent. What we DO verify: @@ -17,7 +17,7 @@ import pytest -from openkb.agent.skill_evaluator import ( +from openkb.skill.evaluator import ( EvalPrompt, EvalResult, _read_description, @@ -79,7 +79,7 @@ async def test_generate_eval_set_parses_plain_json(tmp_path): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output=_fake_generator_payload(10)) - with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=10) assert len(prompts) == 20 @@ -97,7 +97,7 @@ async def test_generate_eval_set_strips_code_fences(tmp_path): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output=fenced) - with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=3) assert len(prompts) == 6 @@ -111,7 +111,7 @@ async def test_grade_one_returns_trigger_for_trigger_response(): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="TRIGGER") - with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): out = await grade_one("desc", "question?", model="gpt-4o-mini") assert out == "trigger" @@ -121,7 +121,7 @@ async def test_grade_one_returns_no_trigger_for_negative_response(): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="NO-TRIGGER") - with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): out = await grade_one("desc", "question?", model="gpt-4o-mini") assert out == "no-trigger" @@ -131,7 +131,7 @@ async def test_grade_one_handles_mixed_case(): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="trigger") - with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): out = await grade_one("desc", "question?", model="gpt-4o-mini") assert out == "trigger" @@ -141,7 +141,7 @@ async def test_grade_one_handles_space_variant(): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="No Trigger") - with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): out = await grade_one("desc", "question?", model="gpt-4o-mini") assert out == "no-trigger" @@ -151,7 +151,7 @@ async def test_grade_one_defaults_to_no_trigger_on_ambiguous_output(): async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="hmm not sure") - with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): + with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): out = await grade_one("desc", "question?", model="gpt-4o-mini") assert out == "no-trigger" @@ -178,7 +178,7 @@ async def fake_grade(description, question, *, model): match = next(p for p in eval_set if p.question == question) return match.expected - with patch("openkb.agent.skill_evaluator.grade_one", side_effect=fake_grade): + with patch("openkb.skill.evaluator.grade_one", side_effect=fake_grade): result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set) assert isinstance(result, EvalResult) @@ -197,7 +197,7 @@ async def test_run_eval_reports_misses(tmp_path): async def fake_grade(description, question, *, model): return "trigger" - with patch("openkb.agent.skill_evaluator.grade_one", side_effect=fake_grade): + with patch("openkb.skill.evaluator.grade_one", side_effect=fake_grade): result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set) assert result.total == 6 @@ -244,7 +244,7 @@ async def test_generate_eval_set_translates_max_turns_to_runtime_error(tmp_path) async def fake_runner(*args, **kwargs): raise MaxTurnsExceeded("ran out") - with patch("openkb.agent.skill_evaluator.Runner.run", + with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): with pytest.raises(RuntimeError, match="max-turn cap"): await generate_eval_set(skill_dir, model="gpt-4o-mini") @@ -258,7 +258,7 @@ async def test_generate_eval_set_translates_malformed_json_to_runtime_error(tmp_ async def fake_runner(*args, **kwargs): return SimpleNamespace(final_output="this is not json at all") - with patch("openkb.agent.skill_evaluator.Runner.run", + with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)): with pytest.raises(RuntimeError, match="non-JSON output"): await generate_eval_set(skill_dir, model="gpt-4o-mini") diff --git a/tests/test_skill_tools.py b/tests/test_skill_tools.py index f5db686d..99f07ee4 100644 --- a/tests/test_skill_tools.py +++ b/tests/test_skill_tools.py @@ -1,7 +1,7 @@ -"""Tests for openkb.agent.skill_tools — path-scoped IO for the skill-create agent.""" +"""Tests for openkb.skill.tools — path-scoped IO for the skill-create agent.""" from __future__ import annotations -from openkb.agent.skill_tools import ( +from openkb.skill.tools import ( list_wiki_dir, read_wiki_file_for_skill, write_skill_file, diff --git a/tests/test_skill_validator.py b/tests/test_skill_validator.py index b59b6d31..39952fc9 100644 --- a/tests/test_skill_validator.py +++ b/tests/test_skill_validator.py @@ -1,10 +1,10 @@ -"""Unit tests for openkb.skill_validator — pure-Python structural checks +"""Unit tests for openkb.skill.validator — pure-Python structural checks on a compiled skill directory. No LLM, no network.""" from __future__ import annotations from pathlib import Path -from openkb.skill_validator import ( +from openkb.skill.validator import ( DESCRIPTION_MAX_CHARS, REFERENCE_MAX_BYTES, SKILL_MD_MAX_BYTES, diff --git a/tests/test_skill_workspace.py b/tests/test_skill_workspace.py index ac4b5af7..2fb9a157 100644 --- a/tests/test_skill_workspace.py +++ b/tests/test_skill_workspace.py @@ -1,11 +1,11 @@ -"""Tests for :mod:`openkb.skill_workspace` — iteration save/restore + diff.""" +"""Tests for :mod:`openkb.skill.workspace` — iteration save/restore + diff.""" from __future__ import annotations from pathlib import Path import pytest -from openkb.skill_workspace import ( +from openkb.skill.workspace import ( list_iterations, restore_iteration, save_iteration, From 1679c8cedac512583003e0d18b0ef19b07cd84db Mon Sep 17 00:00:00 2001 From: mountain Date: Mon, 18 May 2026 19:47:39 +0800 Subject: [PATCH 25/25] docs(readme): drop duplicate Features section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Features list at the top duplicated everything already covered by Quick Start (step-by-step feature tour) + Usage (Wiki Foundation + Generators tables). It also duplicated the Skill Factory slogan that now lives canonically in the Skill Factory subsection. Replace 17 lines with a one-sentence pointer to Usage. 405 → 389 lines. Slogan now appears exactly once (in the Skill Factory subsection) instead of 5 places. Other duplicates (PageIndex mentions, cross-CLI install instructions, Karpathy comparison table) left for now — they target different audiences (scanners vs deep readers) and aren't worth touching in this pass. --- README.md | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/README.md b/README.md index 5497ec4a..f9c73619 100644 --- a/README.md +++ b/README.md @@ -22,23 +22,7 @@ The idea is based on a [concept](https://x.com/karpathy/status/20398056595256445 Traditional RAG rediscovers knowledge from scratch on every query. Nothing accumulates. OpenKB compiles knowledge once into a persistent wiki, then keeps it current. Cross-references already exist. Contradictions are flagged. Synthesis reflects everything consumed. -### Features - -OpenKB has two layers: a **wiki foundation** that compiles and maintains your knowledge, and **generators** that turn that wiki into useful artifacts. - -**Wiki foundation** — compile and maintain -- **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel, text, and more via markitdown -- **Scale to long documents** — Handled via [PageIndex](https://github.com/VectifyAI/PageIndex) tree indexing, enabling accurate, vectorless long-context retrieval -- **Native multi-modality** — Retrieves and understands figures, tables, and images, not just text -- **Compiled Wiki** — LLM compiles documents into summaries, concept pages, and cross-links, all kept in sync -- **Lint** — Health checks find contradictions, gaps, orphans, and stale content -- **Watch mode** — Drop files into `raw/`, wiki updates automatically -- **Obsidian compatible** — Wiki is plain `.md` files with `[[wikilinks]]`. Open in Obsidian for graph view and browsing - -**Generators** — turn the wiki into something useful -- **Query** — Ask questions (one-off) against your wiki. The LLM navigates compiled knowledge to answer -- **Chat** — Multi-turn conversations with persisted sessions you can resume across runs -- **Skill Factory** — *Drop in a book. Out comes a digital expert.* Compile any subset of your wiki into a redistributable [Anthropic Skill](https://docs.claude.com/en/docs/build-with-claude/skills) installable in Claude Code / Codex / Gemini CLI / Cursor +OpenKB has two layers: a **wiki foundation** that compiles and maintains your knowledge, and **generators** (query / chat / Skill Factory) that turn it into useful output. See [Usage](#️-usage) for the full command list. # 🚀 Getting Started