From 06ed99bf79eb47d85fd679f200296e0a6284ceb9 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 12:55:02 +0800
Subject: [PATCH 01/25] feat(skill): scaffold openkb/prompts/ for static system
 prompts

---
 openkb/prompts/__init__.py      | 21 +++++++++++++++++++++
 openkb/prompts/skill_compile.md |  1 +
 2 files changed, 22 insertions(+)
 create mode 100644 openkb/prompts/__init__.py
 create mode 100644 openkb/prompts/skill_compile.md
diff --git a/openkb/prompts/__init__.py b/openkb/prompts/__init__.py
new file mode 100644
index 00000000..c28b2b1d
--- /dev/null
+++ b/openkb/prompts/__init__.py
@@ -0,0 +1,21 @@
+"""Static prompt templates loaded from disk.
+
+Keeping multi-paragraph LLM system prompts in `.md` files (rather than triple-
+quoted Python strings) makes them readable in editors with markdown previews
+and easier to diff/review.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+_PROMPTS_DIR = Path(__file__).parent
+
+
+def load_prompt(name: str) -> str:
+    """Return the contents of ``openkb/prompts/<name>.md`` as a string.
+
+    Args:
+        name: Filename without the ``.md`` suffix (e.g. ``"skill_compile"``).
+    """
+    path = _PROMPTS_DIR / f"{name}.md"
+    return path.read_text(encoding="utf-8")
diff --git a/openkb/prompts/skill_compile.md b/openkb/prompts/skill_compile.md
new file mode 100644
index 00000000..cd88e865
--- /dev/null
+++ b/openkb/prompts/skill_compile.md
@@ -0,0 +1 @@
+PLACEHOLDER — populated in Task 4.

From 4f1db4709bbe30910252ffdb686fbb25ee8251f6 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 12:59:26 +0800
Subject: [PATCH 02/25] feat(skill): path-scoped IO tools for the skill compile
 agent

---
 openkb/agent/skill_tools.py | 68 ++++++++++++++++++++++++++++++++++++
 tests/test_skill_tools.py   | 69 +++++++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 openkb/agent/skill_tools.py
 create mode 100644 tests/test_skill_tools.py

diff --git a/openkb/agent/skill_tools.py b/openkb/agent/skill_tools.py
new file mode 100644
index 00000000..ef24d94e
--- /dev/null
+++ b/openkb/agent/skill_tools.py
@@ -0,0 +1,68 @@
+"""Path-scoped IO tools for the skill-compile agent.
+
+The skill-compile agent runs with three capabilities:
+  * READ from <kb>/wiki/**            (the substrate)
+  * QUERY the wiki via openkb query   (semantic retrieval, see skill_compiler.py)
+  * WRITE under <kb>/output/skills/<name>/**   (the only output destination)
+
+These helpers enforce those boundaries at the Python level — every tool
+resolves its target path, then verifies it stays inside the allowed root.
+Path traversal (``..``) and absolute paths are rejected outright.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+
+def list_wiki_dir(directory: str, wiki_root: str) -> str:
+    """List ``.md`` files in a wiki subdirectory.
+
+    Args:
+        directory: Path relative to *wiki_root* (e.g. ``"concepts"``).
+        wiki_root: Absolute path to ``<kb>/wiki``.
+    """
+    root = Path(wiki_root).resolve()
+    target = (root / directory).resolve()
+    if not target.is_relative_to(root):
+        return "Access denied: path escapes wiki root."
+    if not target.exists() or not target.is_dir():
+        return "No files found."
+    names = sorted(p.name for p in target.iterdir() if p.suffix == ".md")
+    return "\n".join(names) if names else "No files found."
+
+
+def read_wiki_file_for_skill(path: str, wiki_root: str) -> str:
+    """Read a Markdown file from the wiki.
+
+    Args:
+        path: File path relative to *wiki_root* (e.g. ``"concepts/attention.md"``).
+        wiki_root: Absolute path to ``<kb>/wiki``.
+    """
+    root = Path(wiki_root).resolve()
+    full = (root / path).resolve()
+    if not full.is_relative_to(root):
+        return "Access denied: path escapes wiki root."
+    if not full.exists():
+        return f"File not found: {path}"
+    return full.read_text(encoding="utf-8")
+
+
+def write_skill_file(path: str, content: str, skill_root: str) -> str:
+    """Write a file under the skill directory.
+
+    Args:
+        path: Path relative to *skill_root* (e.g. ``"SKILL.md"`` or
+            ``"references/methodology.md"``). Absolute paths and ``..``
+            traversal are rejected.
+        content: File contents.
+        skill_root: Absolute path to ``<kb>/output/skills/<name>``.
+    """
+    if path.startswith("/") or ".." in Path(path).parts:
+        return "Access denied: only relative paths within the skill directory are allowed."
+    root = Path(skill_root).resolve()
+    full = (root / path).resolve()
+    if not full.is_relative_to(root):
+        return "Access denied: path escapes skill root."
+    full.parent.mkdir(parents=True, exist_ok=True)
+    full.write_text(content, encoding="utf-8")
+    return f"Written: {path}"
diff --git a/tests/test_skill_tools.py b/tests/test_skill_tools.py
new file mode 100644
index 00000000..3c620c97
--- /dev/null
+++ b/tests/test_skill_tools.py
@@ -0,0 +1,69 @@
+"""Tests for openkb.agent.skill_tools — path-scoped IO for the skill compile agent."""
+from __future__ import annotations
+
+import pytest
+
+from openkb.agent.skill_tools import (
+    list_wiki_dir,
+    read_wiki_file_for_skill,
+    write_skill_file,
+)
+
+
+def _make_wiki(tmp_path):
+    wiki = tmp_path / "wiki"
+    (wiki / "concepts").mkdir(parents=True)
+    (wiki / "summaries").mkdir(parents=True)
+    (wiki / "index.md").write_text("# index\n")
+    (wiki / "concepts" / "attention.md").write_text("# attention\n")
+    return wiki
+
+
+def test_read_wiki_file_returns_content(tmp_path):
+    wiki = _make_wiki(tmp_path)
+    out = read_wiki_file_for_skill("concepts/attention.md", str(wiki))
+    assert "attention" in out
+
+
+def test_read_wiki_file_rejects_escape(tmp_path):
+    wiki = _make_wiki(tmp_path)
+    out = read_wiki_file_for_skill("../secret.txt", str(wiki))
+    assert "Access denied" in out
+
+
+def test_list_wiki_dir_returns_filenames(tmp_path):
+    wiki = _make_wiki(tmp_path)
+    out = list_wiki_dir("concepts", str(wiki))
+    assert "attention.md" in out
+
+
+def test_list_wiki_dir_handles_missing(tmp_path):
+    wiki = _make_wiki(tmp_path)
+    out = list_wiki_dir("nonexistent", str(wiki))
+    assert out == "No files found."
+
+
+def test_write_skill_file_creates_file_and_parents(tmp_path):
+    skill_root = tmp_path / "output" / "skills" / "demo"
+    out = write_skill_file(
+        "references/methodology.md",
+        "# methodology\n",
+        str(skill_root),
+    )
+    assert "Written" in out
+    assert (skill_root / "references" / "methodology.md").read_text() == "# methodology\n"
+
+
+def test_write_skill_file_rejects_path_traversal(tmp_path):
+    skill_root = tmp_path / "output" / "skills" / "demo"
+    skill_root.mkdir(parents=True)
+    out = write_skill_file("../escaped.md", "x", str(skill_root))
+    assert "Access denied" in out
+    assert not (skill_root.parent / "escaped.md").exists()
+
+
+def test_write_skill_file_rejects_absolute_path(tmp_path):
+    skill_root = tmp_path / "output" / "skills" / "demo"
+    skill_root.mkdir(parents=True)
+    out = write_skill_file("/etc/passwd", "x", str(skill_root))
+    assert "Access denied" in out

From fd26e264752152259fade4b88f094ca7cdafd4b3 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:02:57 +0800
Subject: [PATCH 03/25] feat(skill): kebab-case skill name validation

---
 openkb/cli.py                       | 22 ++++++++++++++++++
 tests/test_skill_name_validation.py | 35 +++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 tests/test_skill_name_validation.py

diff --git a/openkb/cli.py b/openkb/cli.py
index 60d8551a..f5efc6e4 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -143,6 +143,28 @@ def _find_kb_dir(override: Path | None = None) -> Path | None:
     return None
 
 
+def _validate_skill_name(name: str) -> str | None:
+    """Validate a skill slug. Returns None if OK, an error message if not.
+
+    Rules: lowercase ``[a-z0-9-]``, no leading/trailing dash, no consecutive
+    dashes, 1-64 characters. This matches the directory name we'll create
+    under ``<kb>/output/skills/`` and the ``name:`` frontmatter field.
+    """
+    if not name:
+        return "Skill name must not be empty."
+    if len(name) > 64:
+        return "Skill name must be at most 64 characters."
+    if not all(c.islower() or c.isdigit() or c == "-" for c in name):
+        return "Skill name must contain only lowercase letters, digits, and dashes."
+    if name.startswith("-"):
+        return "Skill name must not have a leading dash."
+    if name.endswith("-"):
+        return "Skill name must not have a trailing dash."
+    if "--" in name:
+        return "Skill name must not contain consecutive dashes."
+    return None
+
+
 def add_single_file(file_path: Path, kb_dir: Path) -> Literal["added", "skipped", "failed"]:
     """Convert, index, and compile a single document into the knowledge base.
 
diff --git a/tests/test_skill_name_validation.py b/tests/test_skill_name_validation.py
new file mode 100644
index 00000000..47e548dd
--- /dev/null
+++ b/tests/test_skill_name_validation.py
@@ -0,0 +1,35 @@
+"""Tests for openkb.cli._validate_skill_name — kebab-case slug enforcement."""
+from __future__ import annotations
+
+import pytest
+
+from openkb.cli import _validate_skill_name
+
+
+@pytest.mark.parametrize("name", [
+    "karpathy-thinking",
+    "us-tax-2026",
+    "linalg-tutor",
+    "a",
+    "a-b-c-d-e-f-g",
+])
+def test_accepts_valid_kebab_case(name):
+    assert _validate_skill_name(name) is None  # None means OK
+
+
+@pytest.mark.parametrize("name,reason_fragment", [
+    ("", "empty"),
+    ("UPPER", "lowercase"),
+    ("has space", "lowercase"),
+    ("under_score", "lowercase"),
+    ("dots.bad", "lowercase"),
+    ("-leading", "leading"),
+    ("trailing-", "trailing"),
+    ("double--dash", "consecutive"),
+    ("../escape", "lowercase"),
+    ("a" * 65, "64 characters"),
+])
+def test_rejects_invalid_names(name, reason_fragment):
+    msg = _validate_skill_name(name)
+    assert msg is not None
+    assert reason_fragment in msg.lower()

From 11b52ca586b9b336c03031bb986549e80e875308 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:05:37 +0800
Subject: [PATCH 04/25] fix(skill): _validate_skill_name now enforces ASCII
 slug strictly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous c.islower() check accepted Unicode lowercase letters like
'é' and 'ü', contradicting the [a-z0-9-] docstring promise. Names are
used as filesystem directories and YAML frontmatter, so the slug must
stay ASCII. Add explicit a-z / 0-9 range check + 2 regression tests.
---
 openkb/cli.py                       | 2 +-
 tests/test_skill_name_validation.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/openkb/cli.py b/openkb/cli.py
index f5efc6e4..2f525818 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -154,7 +154,7 @@ def _validate_skill_name(name: str) -> str | None:
         return "Skill name must not be empty."
     if len(name) > 64:
         return "Skill name must be at most 64 characters."
-    if not all(c.islower() or c.isdigit() or c == "-" for c in name):
+    if not all(("a" <= c <= "z") or ("0" <= c <= "9") or c == "-" for c in name):
         return "Skill name must contain only lowercase letters, digits, and dashes."
     if name.startswith("-"):
         return "Skill name must not have a leading dash."
diff --git a/tests/test_skill_name_validation.py b/tests/test_skill_name_validation.py
index 47e548dd..ab8d776a 100644
--- a/tests/test_skill_name_validation.py
+++ b/tests/test_skill_name_validation.py
@@ -28,6 +28,8 @@ def test_accepts_valid_kebab_case(name):
     ("double--dash", "consecutive"),
     ("../escape", "lowercase"),
     ("a" * 65, "64 characters"),
+    ("café", "lowercase"),
+    ("ünicöde", "lowercase"),
 ])
 def test_rejects_invalid_names(name, reason_fragment):
     msg = _validate_skill_name(name)

From f007a3d4873a7d53f13f4ccde386ed45e5ec4216 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:07:06 +0800
Subject: [PATCH 05/25] feat(skill): skill-compile system prompt template

---
 openkb/prompts/skill_compile.md | 98 ++++++++++++++++++++++++++++++++-
 1 file changed, 97 insertions(+), 1 deletion(-)

diff --git a/openkb/prompts/skill_compile.md b/openkb/prompts/skill_compile.md
index cd88e865..7023bf80 100644
--- a/openkb/prompts/skill_compile.md
+++ b/openkb/prompts/skill_compile.md
@@ -1 +1,97 @@
-PLACEHOLDER — populated in Task 4.
+You are the OpenKB skill-compile agent. Your job: read the knowledge base
+wiki at `<kb>/wiki/` and produce a redistributable Anthropic Skill at
+`<kb>/output/skills/{skill_name}/`. Other agents — Claude Code, Codex CLI,
+Gemini CLI, Cursor — will install this skill and load it on demand, so the
+output must follow the Anthropic Skills directory spec exactly.
+
+## User intent
+
+The user requested this skill with the following description. Treat it as
+authoritative; the whole skill exists to serve this intent.
+
+> {intent}
+
+## Wiki schema reference
+
+The wiki you can read from is structured as follows.
+
+{wiki_schema}
+
+## Your tools
+
+* `list_wiki_dir(directory)` — list files in a wiki subdirectory.
+* `read_wiki_file(path)` — read a markdown file under `<kb>/wiki/`.
+* `query_wiki(question)` — semantic search; returns a short answer with
+  citations. Use sparingly (it's an LLM call). Prefer direct file reads
+  when you already know which page you need.
+* `write_skill_file(path, content)` — write under
+  `<kb>/output/skills/{skill_name}/`. Path must be relative; cannot escape
+  the skill root.
+* `done(summary)` — signal completion. Call this exactly once when you are
+  finished writing files. After calling `done`, do not write any more.
+
+## Required output
+
+You MUST write `SKILL.md` at the skill root, with YAML frontmatter:
+
+```yaml
+---
+name: {skill_name}
+description: <one line, ≤ 1024 chars, written so another agent can decide
+  whether to load this skill — this is the single most important string you
+  produce>
+---
+
+# <human-friendly title>
+
+<body — instructions and methodology>
+
+## When to use this skill
+
+- <list of triggering questions or tasks>
+
+## Approach
+
+<the loading agent's high-level method>
+
+## References
+
+- [[references/<slug>]]   (only if you wrote references)
+```
+
+The `description:` field is what other agents see. It must be specific
+(not "a skill about X") and accurate. Spend extra care here.
+
+## Optional output
+
+* `references/<slug>.md` — depth material the loading agent reads on
+  demand. Use when content is too long for `SKILL.md` or only useful for
+  specific sub-questions.
+* `scripts/<name>.py` — executable helpers, ONLY if the user's intent
+  implies deterministic computation (e.g. a tax lookup, a formula
+  solver). Stick to the Python standard library; the loading environment
+  is unknown.
+
+## Working method
+
+1. Read `wiki/index.md` to see what's in the KB.
+2. Form a brief mental plan: which concepts and summaries best serve the
+   user's intent? Which references will you need?
+3. Read the relevant `wiki/concepts/*.md` and `wiki/summaries/*.md` files.
+4. Write `SKILL.md` first. Then references, then scripts (if any).
+5. Self-check: does every `[[references/...]]` link in `SKILL.md` resolve
+   to a file you actually wrote? Is the description specific? Is the
+   `name:` frontmatter field exactly `{skill_name}`?
+6. Call `done(summary)` with a one-paragraph summary of what you wrote.
+
+## Anti-hallucination rules
+
+* Cite sources for non-trivial claims using `[[concepts/<slug>]]` or
+  `[[summaries/<doc>]]` wikilinks back to the wiki.
+* If the wiki doesn't cover something the user's intent implies, write
+  what you can and note the gap in `SKILL.md`'s body — do not fabricate.
+* Do not copy large verbatim passages from `wiki/sources/`. Summarise and
+  cite. The skill must be redistributable; bulk-copying source content
+  could carry copyright risk that the user takes on at submission time.
+
+Begin.

From 6bbfb985fc5b34596172e70c23df3773a85fd332 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:10:08 +0800
Subject: [PATCH 06/25] feat(skill): build skill-compile agent + run loop

---
 openkb/agent/skill_compiler.py | 142 +++++++++++++++++++++++++++++++++
 tests/test_skill_compiler.py   |  84 +++++++++++++++++++
 2 files changed, 226 insertions(+)
 create mode 100644 openkb/agent/skill_compiler.py
 create mode 100644 tests/test_skill_compiler.py

diff --git a/openkb/agent/skill_compiler.py b/openkb/agent/skill_compiler.py
new file mode 100644
index 00000000..79b64751
--- /dev/null
+++ b/openkb/agent/skill_compiler.py
@@ -0,0 +1,142 @@
+"""The skill-compile agent.
+
+Builds on the same openai-agents-SDK + LiteLLM stack as the query agent
+in ``openkb.agent.query``. The differences:
+
+* System prompt comes from ``openkb/prompts/skill_compile.md`` and is
+  interpolated with the user's intent and the target skill name.
+* Tools are scoped: read the wiki, query the wiki, write only within
+  the target skill directory.
+* The runner verifies that ``SKILL.md`` was written before declaring
+  success — an agent that gets confused and never writes the required
+  output should fail loudly rather than silently produce an empty dir.
+"""
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+
+from agents import Agent, Runner, function_tool
+from agents.model_settings import ModelSettings
+
+from openkb.agent.skill_tools import (
+    list_wiki_dir,
+    read_wiki_file_for_skill,
+    write_skill_file,
+)
+from openkb.prompts import load_prompt
+from openkb.schema import get_agents_md
+
+MAX_TURNS = 80  # higher than query (50) because compile can write multiple files
+
+
+def build_skill_compile_agent(
+    *,
+    wiki_root: str,
+    skill_root: str,
+    skill_name: str,
+    intent: str,
+    model: str,
+) -> Agent:
+    """Build the openai-agents Agent for compiling one skill.
+
+    Args:
+        wiki_root: ``<kb>/wiki`` absolute path.
+        skill_root: ``<kb>/output/skills/<name>`` absolute path. Will be
+            created if it does not exist.
+        skill_name: kebab-case slug, also the ``name:`` frontmatter value.
+        intent: the user's natural-language description of what this skill
+            should do.
+        model: LiteLLM-formatted model name from KB config.
+    """
+    Path(skill_root).mkdir(parents=True, exist_ok=True)
+
+    wiki_schema = get_agents_md(Path(wiki_root))
+    instructions = load_prompt("skill_compile").format(
+        intent=intent,
+        skill_name=skill_name,
+        wiki_schema=wiki_schema,
+    )
+
+    @function_tool
+    def list_wiki(directory: str) -> str:
+        """List .md files in a wiki subdirectory (e.g. 'concepts')."""
+        return list_wiki_dir(directory, wiki_root)
+
+    @function_tool
+    def read_wiki(path: str) -> str:
+        """Read a wiki markdown file by path relative to wiki/ (e.g. 'concepts/attention.md')."""
+        return read_wiki_file_for_skill(path, wiki_root)
+
+    @function_tool
+    def query_wiki(question: str) -> str:
+        """Run a semantic query over the wiki and return the answer.
+
+        Use sparingly — this is itself an LLM call. Prefer reading specific
+        files when you already know which one you want.
+        """
+        # Lazy import to avoid circular dependency at module import.
+        from openkb.agent.query import run_query
+        kb_dir = Path(wiki_root).parent
+        config_model = model
+        return asyncio.run(run_query(question, kb_dir, config_model, stream=False))
+
+    @function_tool
+    def write_skill(path: str, content: str) -> str:
+        """Write a file under the skill directory."""
+        return write_skill_file(path, content, skill_root)
+
+    @function_tool
+    def done(summary: str) -> str:
+        """Signal that the skill is complete. Call exactly once when finished."""
+        return f"Compilation marked done: {summary}"
+
+    return Agent(
+        name="skill-compiler",
+        instructions=instructions,
+        tools=[list_wiki, read_wiki, query_wiki, write_skill, done],
+        model=f"litellm/{model}",
+        model_settings=ModelSettings(parallel_tool_calls=False),
+    )
+
+
+async def run_skill_compile(
+    *,
+    kb_dir: Path,
+    skill_name: str,
+    intent: str,
+    model: str,
+) -> Path:
+    """Compile a single skill from the KB's wiki.
+
+    Returns the path to the produced skill directory. Raises
+    ``RuntimeError`` if the agent finishes without writing ``SKILL.md``.
+    """
+    wiki_root = str(kb_dir / "wiki")
+    skill_root = kb_dir / "output" / "skills" / skill_name
+
+    agent = build_skill_compile_agent(
+        wiki_root=wiki_root,
+        skill_root=str(skill_root),
+        skill_name=skill_name,
+        intent=intent,
+        model=model,
+    )
+
+    # Single user message kicks off the compile. The system prompt already
+    # contains the intent — this just nudges the agent to start.
+    seed = (
+        f"Compile the skill '{skill_name}'. Follow the system prompt's "
+        f"working method. Read the wiki, then write the skill files."
+    )
+
+    await Runner.run(agent, seed, max_turns=MAX_TURNS)
+
+    if not (skill_root / "SKILL.md").exists():
+        raise RuntimeError(
+            f"Skill compilation finished but agent did not write SKILL.md "
+            f"at {skill_root}. The skill is incomplete; check the wiki has "
+            f"content related to your intent."
+        )
+
+    return skill_root
diff --git a/tests/test_skill_compiler.py b/tests/test_skill_compiler.py
new file mode 100644
index 00000000..6fd06f22
--- /dev/null
+++ b/tests/test_skill_compiler.py
@@ -0,0 +1,84 @@
+"""Tests for openkb.agent.skill_compiler.
+
+The agent itself is mocked (we don't want to spend tokens in unit tests).
+What we DO test:
+  * Tools wire up correctly (write_skill_file is bound to the right path)
+  * System prompt gets the intent and skill_name interpolated
+  * The skill output dir is created before the agent starts
+  * If the agent finishes without writing SKILL.md, we surface an error
+"""
+from __future__ import annotations
+
+import pytest
+from unittest.mock import AsyncMock, patch
+
+from openkb.agent.skill_compiler import (
+    build_skill_compile_agent,
+    run_skill_compile,
+)
+
+
+def _make_kb(tmp_path):
+    (tmp_path / "wiki" / "concepts").mkdir(parents=True)
+    (tmp_path / "wiki" / "summaries").mkdir(parents=True)
+    (tmp_path / "wiki" / "index.md").write_text("# index\n\nNo concepts yet.\n")
+    (tmp_path / ".openkb").mkdir()
+    (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n")
+    return tmp_path
+
+
+def test_build_agent_interpolates_intent_and_name(tmp_path):
+    kb = _make_kb(tmp_path)
+    agent = build_skill_compile_agent(
+        wiki_root=str(kb / "wiki"),
+        skill_root=str(kb / "output" / "skills" / "demo"),
+        skill_name="demo",
+        intent="distill a tax-lookup skill",
+        model="gpt-4o-mini",
+    )
+    assert "demo" in agent.instructions
+    assert "distill a tax-lookup skill" in agent.instructions
+
+
+@pytest.mark.asyncio
+async def test_run_skill_compile_creates_output_dir(tmp_path):
+    kb = _make_kb(tmp_path)
+    target = kb / "output" / "skills" / "demo"
+    # Fake the agent run: just write a minimal SKILL.md to simulate success.
+    async def fake_runner(*args, **kwargs):
+        target.mkdir(parents=True, exist_ok=True)
+        (target / "SKILL.md").write_text(
+            "---\nname: demo\ndescription: test\n---\n\n# demo\n"
+        )
+        from types import SimpleNamespace
+        return SimpleNamespace(final_output="done")
+
+    with patch("openkb.agent.skill_compiler.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+        await run_skill_compile(
+            kb_dir=kb,
+            skill_name="demo",
+            intent="test intent",
+            model="gpt-4o-mini",
+        )
+
+    assert (target / "SKILL.md").exists()
+
+
+@pytest.mark.asyncio
+async def test_run_skill_compile_raises_when_no_skill_md_written(tmp_path):
+    kb = _make_kb(tmp_path)
+    target = kb / "output" / "skills" / "demo"
+    target.mkdir(parents=True, exist_ok=True)
+    # Agent runs but never writes SKILL.md.
+    async def fake_runner(*args, **kwargs):
+        from types import SimpleNamespace
+        return SimpleNamespace(final_output="done")
+
+    with patch("openkb.agent.skill_compiler.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+        with pytest.raises(RuntimeError, match="did not write SKILL.md"):
+            await run_skill_compile(
+                kb_dir=kb,
+                skill_name="demo",
+                intent="test intent",
+                model="gpt-4o-mini",
+            )

From 9cbc5c6ca2fda06bc564bb4f3cdcd7636905d0bf Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:13:57 +0800
Subject: [PATCH 07/25] feat(skill): regenerate per-KB marketplace.json from
 output/skills/

---
 openkb/marketplace.py     | 111 ++++++++++++++++++++++++++++++++++++++
 tests/test_marketplace.py | 106 ++++++++++++++++++++++++++++++++++++
 2 files changed, 217 insertions(+)
 create mode 100644 openkb/marketplace.py
 create mode 100644 tests/test_marketplace.py

diff --git a/openkb/marketplace.py b/openkb/marketplace.py
new file mode 100644
index 00000000..3e803c4d
--- /dev/null
+++ b/openkb/marketplace.py
@@ -0,0 +1,111 @@
+"""Regenerate the per-KB Claude Code plugin marketplace manifest.
+
+After every `openkb skill new` (and after any chat-side edit to a SKILL.md
+frontmatter), this module scans ``<kb>/output/skills/*/SKILL.md`` and
+rewrites ``<kb>/.claude-plugin/marketplace.json`` listing all currently
+compiled skills.
+
+The schema matches the OpenKB repo's own ``.claude-plugin/marketplace.json``:
+one plugin entry per KB, with a ``skills`` array of relative paths. Other
+agent CLIs (``npx skills add``, Claude Code ``/plugin marketplace add``)
+install from this manifest.
+
+This is a deterministic step — no LLM calls.
+"""
+from __future__ import annotations
+
+import json
+import re
+from pathlib import Path
+from typing import Any
+
+from openkb.config import load_config
+
+
+_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
+_DESCRIPTION_RE = re.compile(r"^description:\s*(.+?)\s*$", re.MULTILINE)
+
+
+def _read_skill_description(skill_md: Path) -> str:
+    """Pull the ``description:`` field from a SKILL.md frontmatter block.
+
+    Returns an empty string if missing or unparseable — the manifest still
+    lists the skill, just with a generic plugin-level description.
+    """
+    if not skill_md.exists():
+        return ""
+    text = skill_md.read_text(encoding="utf-8")
+    fm_match = _FRONTMATTER_RE.match(text)
+    if not fm_match:
+        return ""
+    desc_match = _DESCRIPTION_RE.search(fm_match.group(1))
+    if not desc_match:
+        return ""
+    return desc_match.group(1).strip()
+
+
+def _kb_name(kb_dir: Path) -> str:
+    """Use the KB directory name as the marketplace name (sluggable)."""
+    return kb_dir.name
+
+
+def _list_skill_dirs(kb_dir: Path) -> list[Path]:
+    """Return skill directories under <kb>/output/skills/ that contain a SKILL.md."""
+    skills_root = kb_dir / "output" / "skills"
+    if not skills_root.is_dir():
+        return []
+    return sorted(
+        d for d in skills_root.iterdir()
+        if d.is_dir() and (d / "SKILL.md").exists()
+    )
+
+
+def _build_manifest(kb_dir: Path) -> dict[str, Any]:
+    skills = _list_skill_dirs(kb_dir)
+    skill_paths = [f"./output/skills/{d.name}" for d in skills]
+
+    # Aggregate description for the manifest metadata
+    name = _kb_name(kb_dir)
+    metadata_desc = (
+        f"Skills compiled from the '{name}' knowledge base via OpenKB."
+    )
+    if skills:
+        first_desc = _read_skill_description(skills[0] / "SKILL.md")
+        if first_desc:
+            metadata_desc += f" Featured: {first_desc[:200]}"
+
+    # Pull KB config for version if available; default to 0.1.0
+    config = load_config(kb_dir / ".openkb" / "config.yaml")
+    version = str(config.get("version", "0.1.0"))
+
+    return {
+        "name": name,
+        "metadata": {
+            "description": metadata_desc,
+            "version": version,
+        },
+        "plugins": [
+            {
+                "name": name,
+                "description": metadata_desc,
+                "source": "./",
+                "version": version,
+                "skills": skill_paths,
+            }
+        ],
+    }
+
+
+def regenerate_marketplace(kb_dir: Path) -> Path:
+    """Rewrite ``<kb>/.claude-plugin/marketplace.json`` from current skills.
+
+    Returns the path to the manifest. Creates ``.claude-plugin/`` if needed.
+    Safe to call when zero skills exist (manifest lists an empty ``skills``
+    array).
+    """
+    manifest = _build_manifest(kb_dir)
+    out_dir = kb_dir / ".claude-plugin"
+    out_dir.mkdir(parents=True, exist_ok=True)
+    out_path = out_dir / "marketplace.json"
+    out_path.write_text(json.dumps(manifest, indent=2) + "\n", encoding="utf-8")
+    return out_path
diff --git a/tests/test_marketplace.py b/tests/test_marketplace.py
new file mode 100644
index 00000000..3a7df48d
--- /dev/null
+++ b/tests/test_marketplace.py
@@ -0,0 +1,106 @@
+"""Tests for openkb.marketplace — regenerate <kb>/.claude-plugin/marketplace.json
+from <kb>/output/skills/*/SKILL.md."""
+from __future__ import annotations
+
+import json
+import textwrap
+
+from openkb.marketplace import regenerate_marketplace
+
+
+def _make_kb(tmp_path):
+    (tmp_path / ".openkb").mkdir()
+    (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n")
+    (tmp_path / "output" / "skills").mkdir(parents=True)
+    return tmp_path
+
+
+def _make_skill(kb, name, description):
+    d = kb / "output" / "skills" / name
+    d.mkdir(parents=True, exist_ok=True)
+    (d / "SKILL.md").write_text(textwrap.dedent(f"""\
+        ---
+        name: {name}
+        description: {description}
+        ---
+
+        # {name}
+        """))
+
+
+def test_regenerate_creates_manifest_with_one_skill(tmp_path):
+    kb = _make_kb(tmp_path)
+    _make_skill(kb, "karpathy-thinking", "Reason like Karpathy on transformers.")
+
+    regenerate_marketplace(kb)
+
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    assert manifest["plugins"][0]["skills"] == ["./output/skills/karpathy-thinking"]
+    # The plugin's description carries the SKILL.md description for the latest entry,
+    # OR we use a fixed manifest-level description — implementation choice; we just
+    # check the SKILL.md description appears somewhere in the manifest JSON.
+    assert "Reason like Karpathy" in json.dumps(manifest)
+
+
+def test_regenerate_lists_multiple_skills_alphabetical(tmp_path):
+    kb = _make_kb(tmp_path)
+    _make_skill(kb, "zeta-skill", "z")
+    _make_skill(kb, "alpha-skill", "a")
+    _make_skill(kb, "middle-skill", "m")
+
+    regenerate_marketplace(kb)
+
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    assert manifest["plugins"][0]["skills"] == [
+        "./output/skills/alpha-skill",
+        "./output/skills/middle-skill",
+        "./output/skills/zeta-skill",
+    ]
+
+
+def test_regenerate_replaces_existing_file(tmp_path):
+    kb = _make_kb(tmp_path)
+    (kb / ".claude-plugin").mkdir()
+    (kb / ".claude-plugin" / "marketplace.json").write_text('{"name": "stale"}')
+
+    _make_skill(kb, "demo", "d")
+    regenerate_marketplace(kb)
+
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    assert manifest["name"] != "stale"
+    assert manifest["plugins"][0]["skills"] == ["./output/skills/demo"]
+
+
+def test_regenerate_handles_zero_skills(tmp_path):
+    kb = _make_kb(tmp_path)
+    regenerate_marketplace(kb)
+
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    assert manifest["plugins"][0]["skills"] == []
+
+
+def test_regenerate_skips_skill_with_missing_skill_md(tmp_path):
+    kb = _make_kb(tmp_path)
+    # Folder exists but no SKILL.md — should not appear in manifest
+    (kb / "output" / "skills" / "broken").mkdir(parents=True)
+    _make_skill(kb, "good", "g")
+
+    regenerate_marketplace(kb)
+
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    assert manifest["plugins"][0]["skills"] == ["./output/skills/good"]
+
+
+def test_regenerate_reads_description_from_frontmatter(tmp_path):
+    kb = _make_kb(tmp_path)
+    _make_skill(kb, "demo", "the specific description goes here")
+    regenerate_marketplace(kb)
+
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    # Manifest's metadata.description should at minimum mention the KB context;
+    # the per-skill description lives in the SKILL.md itself (the manifest just
+    # points at the skill directories).
+    assert "marketplace.json" in str(kb / ".claude-plugin" / "marketplace.json")
+    # Reload SKILL.md to confirm description preserved
+    skill_md = (kb / "output" / "skills" / "demo" / "SKILL.md").read_text()
+    assert "the specific description goes here" in skill_md

From e12dfd2bfc2d5a1659fb8afcb3918b05688e3df3 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:17:43 +0800
Subject: [PATCH 08/25] fix(skill): include owner/author in marketplace.json
 from git config

The plan-verbatim implementation omitted the 'owner' field at the top
level. Some marketplace consumers (Claude Code's /plugin marketplace add)
expect 'owner' for listing/attribution. Derive from git config; fall back
to 'openkb-user' if git isn't configured. Mirror as plugin-level 'author'
to match the existing repo convention.
---
 openkb/marketplace.py     | 38 ++++++++++++++++++++++++++---
 tests/test_marketplace.py | 51 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/openkb/marketplace.py b/openkb/marketplace.py
index 3e803c4d..48887afe 100644
--- a/openkb/marketplace.py
+++ b/openkb/marketplace.py
@@ -5,10 +5,11 @@
 rewrites ``<kb>/.claude-plugin/marketplace.json`` listing all currently
 compiled skills.
 
-The schema matches the OpenKB repo's own ``.claude-plugin/marketplace.json``:
-one plugin entry per KB, with a ``skills`` array of relative paths. Other
-agent CLIs (``npx skills add``, Claude Code ``/plugin marketplace add``)
-install from this manifest.
+The schema is a subset compatible with the OpenKB repo's own
+``.claude-plugin/marketplace.json``: one plugin entry per KB, with a
+``skills`` array of relative paths. ``owner`` is derived from git config
+so Claude Code's ``/plugin marketplace add`` accepts the manifest. Other
+agent CLIs (``npx skills add``) install from the same file.
 
 This is a deterministic step — no LLM calls.
 """
@@ -26,6 +27,32 @@
 _DESCRIPTION_RE = re.compile(r"^description:\s*(.+?)\s*$", re.MULTILINE)
 
 
+def _git_owner() -> dict[str, str]:
+    """Read user.name and user.email from git config for the manifest owner.
+
+    Falls back to placeholders if git isn't configured — the manifest is
+    still valid, just less helpful for marketplace listings.
+    """
+    import subprocess
+
+    def _git(key: str) -> str:
+        try:
+            result = subprocess.run(
+                ["git", "config", "--get", key],
+                capture_output=True, text=True, timeout=2,
+            )
+            return result.stdout.strip()
+        except (subprocess.SubprocessError, FileNotFoundError):
+            return ""
+
+    name = _git("user.name") or "openkb-user"
+    email = _git("user.email") or ""
+    owner: dict[str, str] = {"name": name}
+    if email:
+        owner["email"] = email
+    return owner
+
+
 def _read_skill_description(skill_md: Path) -> str:
     """Pull the ``description:`` field from a SKILL.md frontmatter block.
 
@@ -78,8 +105,10 @@ def _build_manifest(kb_dir: Path) -> dict[str, Any]:
     config = load_config(kb_dir / ".openkb" / "config.yaml")
     version = str(config.get("version", "0.1.0"))
 
+    owner = _git_owner()
     return {
         "name": name,
+        "owner": owner,
         "metadata": {
             "description": metadata_desc,
             "version": version,
@@ -90,6 +119,7 @@ def _build_manifest(kb_dir: Path) -> dict[str, Any]:
                 "description": metadata_desc,
                 "source": "./",
                 "version": version,
+                "author": owner,
                 "skills": skill_paths,
             }
         ],
diff --git a/tests/test_marketplace.py b/tests/test_marketplace.py
index 3a7df48d..62c94f6c 100644
--- a/tests/test_marketplace.py
+++ b/tests/test_marketplace.py
@@ -104,3 +104,54 @@ def test_regenerate_reads_description_from_frontmatter(tmp_path):
     # Reload SKILL.md to confirm description preserved
     skill_md = (kb / "output" / "skills" / "demo" / "SKILL.md").read_text()
     assert "the specific description goes here" in skill_md
+
+
+def test_regenerate_includes_owner_from_git_config(tmp_path, monkeypatch):
+    """The manifest must include an ``owner`` field at the top level so
+    that Claude Code's /plugin marketplace add accepts it."""
+    kb = _make_kb(tmp_path)
+    _make_skill(kb, "demo", "d")
+
+    # Patch subprocess.run to return a controlled git output
+    import subprocess
+    real_run = subprocess.run
+
+    def fake_run(cmd, **kwargs):
+        if cmd[:3] == ["git", "config", "--get"]:
+            key = cmd[3]
+            if key == "user.name":
+                return subprocess.CompletedProcess(cmd, 0, stdout="Test User\n", stderr="")
+            if key == "user.email":
+                return subprocess.CompletedProcess(cmd, 0, stdout="test@example.com\n", stderr="")
+        return real_run(cmd, **kwargs)
+
+    monkeypatch.setattr(subprocess, "run", fake_run)
+
+    regenerate_marketplace(kb)
+
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    assert manifest["owner"] == {"name": "Test User", "email": "test@example.com"}
+    assert manifest["plugins"][0]["author"] == {"name": "Test User", "email": "test@example.com"}
+
+
+def test_regenerate_falls_back_when_no_git_config(tmp_path, monkeypatch):
+    """If git config is empty, manifest still generates with a placeholder."""
+    kb = _make_kb(tmp_path)
+    _make_skill(kb, "demo", "d")
+
+    import subprocess
+    real_run = subprocess.run
+
+    def fake_run(cmd, **kwargs):
+        if cmd[:3] == ["git", "config", "--get"]:
+            return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="")
+        return real_run(cmd, **kwargs)
+
+    monkeypatch.setattr(subprocess, "run", fake_run)
+
+    regenerate_marketplace(kb)
+
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    assert manifest["owner"]["name"] == "openkb-user"
+    # email is optional when missing
+    assert "email" not in manifest["owner"] or manifest["owner"]["email"] == ""

From 0fdf0c4ae6d5753030bdbe7e4aac95d01891b8df Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:19:03 +0800
Subject: [PATCH 09/25] feat(skill): Generator primitive for output/* artifacts

---
 openkb/generator.py     | 66 +++++++++++++++++++++++++++++++++++++++++
 tests/test_generator.py | 60 +++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 openkb/generator.py
 create mode 100644 tests/test_generator.py

diff --git a/openkb/generator.py b/openkb/generator.py
new file mode 100644
index 00000000..f24b4b73
--- /dev/null
+++ b/openkb/generator.py
@@ -0,0 +1,66 @@
+"""Generator primitive — shared abstraction for all `<kb>/output/<type>/` artifacts.
+
+v0.1 supports ``target_type="skill"`` only. Future targets (``"ppt"``,
+``"podcast"``, ``"report"``, ``"video"``) will register here and reuse the
+same:
+
+* output-path convention: ``<kb>/output/<type>/<name>/``
+* post-run hook: marketplace.json regeneration (so artifacts ride the same
+  distribution mechanic as skills)
+
+Each target plugs in its own `run` coroutine. v0.1's only entry calls into
+``openkb.agent.skill_compiler.run_skill_compile``.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Literal
+
+from openkb.agent.skill_compiler import run_skill_compile
+from openkb.marketplace import regenerate_marketplace
+
+
+TargetType = Literal["skill"]  # extend as new targets land
+
+
+class Generator:
+    """A v0.1 generator instance.
+
+    Args:
+        target_type: One of the supported targets. v0.1: ``"skill"``.
+        name: kebab-case slug; becomes the output directory name.
+        intent: natural-language description of the desired artifact.
+        kb_dir: KB root.
+        model: LiteLLM model name (from KB config).
+    """
+
+    def __init__(
+        self,
+        *,
+        target_type: TargetType,
+        name: str,
+        intent: str,
+        kb_dir: Path,
+        model: str,
+    ) -> None:
+        if target_type != "skill":
+            raise ValueError(
+                f"Unknown target_type {target_type!r}. v0.1 supports only 'skill'."
+            )
+        self.target_type = target_type
+        self.name = name
+        self.intent = intent
+        self.kb_dir = kb_dir
+        self.model = model
+        self.output_dir = kb_dir / "output" / f"{target_type}s" / name
+
+    async def run(self) -> Path:
+        """Execute the generator. Returns the path to the produced artifact."""
+        await run_skill_compile(
+            kb_dir=self.kb_dir,
+            skill_name=self.name,
+            intent=self.intent,
+            model=self.model,
+        )
+        regenerate_marketplace(self.kb_dir)
+        return self.output_dir
diff --git a/tests/test_generator.py b/tests/test_generator.py
new file mode 100644
index 00000000..b2e7d0de
--- /dev/null
+++ b/tests/test_generator.py
@@ -0,0 +1,60 @@
+"""Tests for openkb.generator.Generator — the v0.1 abstraction that will
+be reused by future ppt / podcast generators.
+
+In v0.1, only target_type='skill' is supported. We test the dispatch shape
+so future targets slot in cleanly."""
+from __future__ import annotations
+
+import pytest
+from unittest.mock import AsyncMock, patch
+
+from openkb.generator import Generator
+
+
+def _make_kb(tmp_path):
+    (tmp_path / ".openkb").mkdir()
+    (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n")
+    (tmp_path / "wiki").mkdir()
+    (tmp_path / "wiki" / "index.md").write_text("# index\n")
+    return tmp_path
+
+
+def test_generator_rejects_unknown_target_type(tmp_path):
+    kb = _make_kb(tmp_path)
+    with pytest.raises(ValueError, match="target_type"):
+        Generator(
+            target_type="ppt",
+            name="demo",
+            intent="x",
+            kb_dir=kb,
+            model="gpt-4o-mini",
+        )
+
+
+def test_generator_skill_target_constructs_ok(tmp_path):
+    kb = _make_kb(tmp_path)
+    g = Generator(
+        target_type="skill",
+        name="demo",
+        intent="x",
+        kb_dir=kb,
+        model="gpt-4o-mini",
+    )
+    assert g.output_dir == kb / "output" / "skills" / "demo"
+
+
+@pytest.mark.asyncio
+async def test_generator_run_delegates_to_skill_compiler(tmp_path):
+    kb = _make_kb(tmp_path)
+    g = Generator(
+        target_type="skill",
+        name="demo",
+        intent="x",
+        kb_dir=kb,
+        model="gpt-4o-mini",
+    )
+    with patch("openkb.generator.run_skill_compile", new=AsyncMock()) as runner, \
+         patch("openkb.generator.regenerate_marketplace") as regen:
+        await g.run()
+    runner.assert_awaited_once()
+    regen.assert_called_once_with(kb)

From 3c55703551ccacb627982e6f8d4a13237d101ab6 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:22:55 +0800
Subject: [PATCH 10/25] feat(skill): openkb skill new CLI command

---
 openkb/cli.py           | 111 ++++++++++++++++++++++++++++++++++++++++
 tests/test_skill_cli.py | 111 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 222 insertions(+)
 create mode 100644 tests/test_skill_cli.py

diff --git a/openkb/cli.py b/openkb/cli.py
index 2f525818..9d7d950c 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -1362,3 +1362,114 @@ def feedback(ctx, message, feedback_type):
             "  (no browser available — copy the URL above to file the issue)",
             err=True,
         )
+
+
+# ---------------------------------------------------------------------------
+# `openkb skill ...` — skill factory (v0.1)
+# ---------------------------------------------------------------------------
+
+@cli.group()
+def skill():
+    """Compile knowledge into a redistributable Anthropic Skill."""
+
+
+@skill.command("new")
+@click.argument("name")
+@click.argument("intent")
+@click.option(
+    "-y", "--yes", "yes_flag",
+    is_flag=True, default=False,
+    help="Overwrite existing output/skills/<name>/ without prompting.",
+)
+@click.pass_context
+def skill_new(ctx, name, intent, yes_flag):
+    """Compile a new skill from this KB's wiki.
+
+    NAME is a kebab-case slug used for the output directory and skill name.
+    INTENT is a natural-language description of what this skill should do.
+
+    Example:
+
+      openkb skill new karpathy-thinking "Reason about transformers like Karpathy"
+    """
+    import asyncio
+    import shutil
+    import sys
+
+    kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
+    if kb_dir is None:
+        click.echo("No knowledge base found. Run `openkb init` first.", err=True)
+        ctx.exit(1)
+
+    # Validate name
+    err = _validate_skill_name(name)
+    if err:
+        click.echo(f"[ERROR] {err}", err=True)
+        ctx.exit(2)
+
+    # Validate wiki exists and has content
+    wiki = kb_dir / "wiki"
+    if not wiki.is_dir():
+        click.echo(
+            "[ERROR] No wiki found in this KB. Run `openkb add <source>` "
+            "to ingest documents first.",
+            err=True,
+        )
+        ctx.exit(1)
+    has_content = any(wiki.iterdir())
+    if not has_content:
+        click.echo(
+            "[ERROR] Wiki has no compiled content yet. Ingest at least one "
+            "document with `openkb add` first.",
+            err=True,
+        )
+        ctx.exit(1)
+
+    # Overwrite handling
+    target = kb_dir / "output" / "skills" / name
+    if target.exists():
+        if yes_flag:
+            shutil.rmtree(target)
+        elif sys.stdin.isatty():
+            if not click.confirm(
+                f"output/skills/{name}/ already exists. Overwrite?",
+                default=False,
+            ):
+                click.echo("Aborted.")
+                ctx.exit(1)
+            shutil.rmtree(target)
+        else:
+            click.echo(
+                f"[ERROR] output/skills/{name}/ exists. Pass -y to overwrite "
+                f"in non-interactive contexts.",
+                err=True,
+            )
+            ctx.exit(1)
+
+    # Load config + key
+    _setup_llm_key(kb_dir)
+    config = load_config(kb_dir / ".openkb" / "config.yaml")
+    model = config.get("model", DEFAULT_CONFIG["model"])
+
+    # Run the generator
+    from openkb.generator import Generator
+    click.echo(f"Compiling skill '{name}'...")
+    try:
+        gen = Generator(
+            target_type="skill",
+            name=name,
+            intent=intent,
+            kb_dir=kb_dir,
+            model=model,
+        )
+        asyncio.run(gen.run())
+    except RuntimeError as exc:
+        click.echo(f"[ERROR] {exc}", err=True)
+        ctx.exit(1)
+
+    click.echo(f"\nSaved: output/skills/{name}/")
+    click.echo(f"Manifest: .claude-plugin/marketplace.json updated")
+    click.echo(f"\nInstall locally:")
+    click.echo(f"  cp -r output/skills/{name} ~/.claude/skills/")
+    click.echo(f"\nShare (push KB to GitHub, then):")
+    click.echo(f"  npx skills@latest add <owner>/<repo>")
diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py
new file mode 100644
index 00000000..c1ef6a30
--- /dev/null
+++ b/tests/test_skill_cli.py
@@ -0,0 +1,111 @@
+"""End-to-end tests for `openkb skill new` via click.testing.CliRunner.
+
+The agent runner is patched so these tests don't burn LLM tokens. They
+verify the CLI wiring: KB detection, name validation, overwrite logic,
+marketplace.json regeneration, exit codes."""
+from __future__ import annotations
+
+import json
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from click.testing import CliRunner
+
+from openkb.cli import cli
+
+
+def _make_kb(tmp_path):
+    (tmp_path / ".openkb").mkdir()
+    (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n")
+    (tmp_path / "wiki" / "concepts").mkdir(parents=True)
+    (tmp_path / "wiki" / "summaries").mkdir(parents=True)
+    (tmp_path / "wiki" / "index.md").write_text("# index\n")
+    return tmp_path
+
+
+def _fake_compile(kb_dir, skill_name, **_kw):
+    """Side-effect for the patched run_skill_compile: write a minimal SKILL.md."""
+    target = kb_dir / "output" / "skills" / skill_name
+    target.mkdir(parents=True, exist_ok=True)
+    (target / "SKILL.md").write_text(
+        f"---\nname: {skill_name}\ndescription: test description\n---\n\n# {skill_name}\n"
+    )
+
+
+def test_skill_new_succeeds_and_writes_files(tmp_path):
+    kb = _make_kb(tmp_path)
+    runner = CliRunner()
+
+    async def fake_run(kb_dir, skill_name, intent, model):
+        _fake_compile(kb_dir, skill_name)
+
+    with patch("openkb.cli._find_kb_dir", return_value=kb), \
+         patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)):
+        result = runner.invoke(cli, ["skill", "new", "demo", "test intent"])
+
+    assert result.exit_code == 0, result.output
+    assert (kb / "output" / "skills" / "demo" / "SKILL.md").exists()
+    assert (kb / ".claude-plugin" / "marketplace.json").exists()
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    assert manifest["plugins"][0]["skills"] == ["./output/skills/demo"]
+
+
+def test_skill_new_rejects_invalid_name(tmp_path):
+    kb = _make_kb(tmp_path)
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        result = runner.invoke(cli, ["skill", "new", "BadName", "x"])
+    assert result.exit_code != 0
+    assert "lowercase" in result.output.lower()
+
+
+def test_skill_new_errors_without_kb(tmp_path):
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=None):
+        result = runner.invoke(cli, ["skill", "new", "demo", "x"])
+    assert result.exit_code != 0
+    assert "No knowledge base" in result.output
+
+
+def test_skill_new_errors_with_empty_wiki(tmp_path):
+    kb = tmp_path
+    (kb / ".openkb").mkdir()
+    (kb / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n")
+    # No wiki/ directory
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        result = runner.invoke(cli, ["skill", "new", "demo", "x"])
+    assert result.exit_code != 0
+    assert "wiki" in result.output.lower()
+
+
+def test_skill_new_aborts_when_target_exists_without_yes(tmp_path):
+    kb = _make_kb(tmp_path)
+    (kb / "output" / "skills" / "demo").mkdir(parents=True)
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        # Simulate non-interactive abort (CliRunner doesn't supply a TTY,
+        # which our error path treats as "must pass -y").
+        result = runner.invoke(cli, ["skill", "new", "demo", "x"], input="n\n")
+    assert result.exit_code != 0
+    # Either it asked and we said no, or it detected non-TTY and errored out.
+    out = result.output.lower()
+    assert "exists" in out or "overwrite" in out or "aborted" in out
+
+
+def test_skill_new_overwrites_with_yes_flag(tmp_path):
+    kb = _make_kb(tmp_path)
+    (kb / "output" / "skills" / "demo").mkdir(parents=True)
+    (kb / "output" / "skills" / "demo" / "stale.txt").write_text("old")
+    runner = CliRunner()
+
+    async def fake_run(kb_dir, skill_name, intent, model):
+        _fake_compile(kb_dir, skill_name)
+
+    with patch("openkb.cli._find_kb_dir", return_value=kb), \
+         patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)):
+        result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"])
+
+    assert result.exit_code == 0, result.output
+    assert not (kb / "output" / "skills" / "demo" / "stale.txt").exists()
+    assert (kb / "output" / "skills" / "demo" / "SKILL.md").exists()

From e54fe8dd04d8352211a115d4eec9b0d9b057857d Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:26:22 +0800
Subject: [PATCH 11/25] fix(skill): restore strict wiki-content check in skill
 new

The plan called for verifying that wiki/concepts/ or wiki/summaries/
has actual files before allowing skill compilation. Earlier impl loosened
this to 'any file in wiki/', which silently accepted freshly-init'd KBs
(openkb init pre-creates empty concepts/ + summaries/ dirs). Restore
the strict check + populate test fixture + add regression test for
the freshly-init'd case.
---
 openkb/cli.py           |  5 ++++-
 tests/test_skill_cli.py | 21 +++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/openkb/cli.py b/openkb/cli.py
index 9d7d950c..b96894ba 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -1416,7 +1416,10 @@ def skill_new(ctx, name, intent, yes_flag):
             err=True,
         )
         ctx.exit(1)
-    has_content = any(wiki.iterdir())
+    has_content = any(
+        (wiki / sub).is_dir() and any((wiki / sub).iterdir())
+        for sub in ("concepts", "summaries")
+    )
     if not has_content:
         click.echo(
             "[ERROR] Wiki has no compiled content yet. Ingest at least one "
diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py
index c1ef6a30..297f9e1d 100644
--- a/tests/test_skill_cli.py
+++ b/tests/test_skill_cli.py
@@ -20,6 +20,9 @@ def _make_kb(tmp_path):
     (tmp_path / "wiki" / "concepts").mkdir(parents=True)
     (tmp_path / "wiki" / "summaries").mkdir(parents=True)
     (tmp_path / "wiki" / "index.md").write_text("# index\n")
+    # Populate the wiki with compiled content so the wiki-content gate accepts it.
+    (tmp_path / "wiki" / "concepts" / "demo.md").write_text("# demo\n")
+    (tmp_path / "wiki" / "summaries" / "demo.md").write_text("# demo\n")
     return tmp_path
 
 
@@ -79,6 +82,24 @@ def test_skill_new_errors_with_empty_wiki(tmp_path):
     assert "wiki" in result.output.lower()
 
 
+def test_skill_new_errors_with_freshly_init_wiki(tmp_path):
+    """A freshly init'd KB has wiki/ + empty concepts/ + summaries/ + index.md.
+    No documents have been ingested. The skill factory must refuse to compile
+    rather than spend tokens on an empty wiki."""
+    kb = tmp_path
+    (kb / ".openkb").mkdir()
+    (kb / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n")
+    # Mirror openkb init's layout: empty concepts + summaries, just index.md
+    (kb / "wiki" / "concepts").mkdir(parents=True)
+    (kb / "wiki" / "summaries").mkdir(parents=True)
+    (kb / "wiki" / "index.md").write_text("# index\n")
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        result = runner.invoke(cli, ["skill", "new", "demo", "x"])
+    assert result.exit_code != 0
+    assert "compiled content" in result.output.lower() or "ingest" in result.output.lower()
+
+
 def test_skill_new_aborts_when_target_exists_without_yes(tmp_path):
     kb = _make_kb(tmp_path)
     (kb / "output" / "skills" / "demo").mkdir(parents=True)

From 44459a1c6785e13f34c84de5722392951f63a17e Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:30:21 +0800
Subject: [PATCH 12/25] feat(skill): /skill new slash command + chat write tool
 extension

---
 openkb/agent/chat.py           | 66 +++++++++++++++++++++++++++++++---
 openkb/agent/query.py          | 44 ++++++++++++++++++++++-
 openkb/agent/tools.py          | 40 +++++++++++++++++++++
 tests/test_skill_chat_slash.py | 64 +++++++++++++++++++++++++++++++++
 4 files changed, 209 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_skill_chat_slash.py

diff --git a/openkb/agent/chat.py b/openkb/agent/chat.py
index 57802ce4..04ffe4f7 100644
--- a/openkb/agent/chat.py
+++ b/openkb/agent/chat.py
@@ -23,7 +23,7 @@
 from prompt_toolkit.styles import Style
 
 from openkb.agent.chat_session import ChatSession
-from openkb.agent.query import MAX_TURNS, build_query_agent
+from openkb.agent.query import MAX_TURNS, build_chat_agent, build_query_agent
 from openkb.log import append_log
 
 
@@ -59,6 +59,7 @@
     "  /list          List all documents in the knowledge base\n"
     "  /lint          Lint the knowledge base\n"
     "  /add <path>    Add a document or directory to the knowledge base\n"
+    '  /skill new <name> "<intent>"   Compile a skill from the wiki\n'
     "  /help          Show this"
 )
 
@@ -214,6 +215,7 @@ def _bottom_toolbar(session: ChatSession) -> FormattedText:
     ("/list",   "List all documents"),
     ("/lint",   "Lint the knowledge base"),
     ("/add",    "Add a document or directory"),
+    ("/skill",  "compile a skill (try `/skill new <name> \"intent\"`)"),
 ]
 
 
@@ -494,6 +496,59 @@ async def _run_add(arg: str, kb_dir: Path, style: Style) -> None:
         await asyncio.to_thread(add_single_file, target, kb_dir)
 
 
+async def _handle_slash_skill(arg: str, kb_dir: Path, style: Style) -> None:
+    """Dispatch ``/skill new <name> "<intent>"`` and any future skill subcommands."""
+    import shlex
+
+    parts = shlex.split(arg) if arg else []
+    if not parts:
+        _fmt(style, ("class:error", "Usage: /skill new <name> \"<intent>\"\n"))
+        return
+
+    sub = parts[0].lower()
+    if sub != "new":
+        _fmt(style, ("class:error", f"Unknown skill subcommand: {sub}. Try /skill new.\n"))
+        return
+
+    if len(parts) < 3:
+        _fmt(style, ("class:error", "Usage: /skill new <name> \"<intent>\"\n"))
+        return
+
+    name = parts[1]
+    intent = " ".join(parts[2:])
+
+    from openkb.cli import _validate_skill_name
+    err = _validate_skill_name(name)
+    if err:
+        _fmt(style, ("class:error", f"[ERROR] {err}\n"))
+        return
+
+    # Load model from KB config
+    from openkb.config import load_config, DEFAULT_CONFIG
+    config = load_config(kb_dir / ".openkb" / "config.yaml")
+    model = config.get("model", DEFAULT_CONFIG["model"])
+
+    from openkb.generator import Generator
+    _fmt(style, ("class:slash.help", f"Compiling skill '{name}'...\n"))
+    try:
+        gen = Generator(
+            target_type="skill",
+            name=name,
+            intent=intent,
+            kb_dir=kb_dir,
+            model=model,
+        )
+        await gen.run()
+    except RuntimeError as exc:
+        _fmt(style, ("class:error", f"[ERROR] {exc}\n"))
+        return
+
+    _fmt(style, ("class:slash.ok", f"Saved: output/skills/{name}/\n"))
+    _fmt(style, ("class:slash.help",
+        f"Iterate: ask follow-up questions in this chat and the agent can "
+        f"edit files under output/skills/{name}/ directly.\n"))
+
+
 async def _handle_slash(
     cmd: str,
     kb_dir: Path,
@@ -557,6 +612,10 @@ async def _handle_slash(
         await _run_add(arg, kb_dir, style)
         return None
 
+    if head == "/skill":
+        await _handle_slash_skill(arg, kb_dir, style)
+        return None
+
     _fmt(
         style,
         ("class:error", f"Unknown command: {head}. Try /help.\n"),
@@ -579,8 +638,7 @@ async def run_chat(
 
     config = load_config(kb_dir / ".openkb" / "config.yaml")
     language = session.language or config.get("language", "en")
-    wiki_root = str(kb_dir / "wiki")
-    agent = build_query_agent(wiki_root, session.model, language=language)
+    agent = build_chat_agent(kb_dir, session.model, language=language)
 
     _print_header(session, kb_dir, style)
     if session.turn_count > 0:
@@ -620,7 +678,7 @@ async def run_chat(
                 return
             if action == "new_session":
                 session = ChatSession.new(kb_dir, session.model, session.language)
-                agent = build_query_agent(wiki_root, session.model, language=language)
+                agent = build_chat_agent(kb_dir, session.model, language=language)
                 prompt_session = _make_prompt_session(session, style, use_color, kb_dir)
             continue
 
diff --git a/openkb/agent/query.py b/openkb/agent/query.py
index af3abfe4..790a186c 100644
--- a/openkb/agent/query.py
+++ b/openkb/agent/query.py
@@ -6,7 +6,12 @@
 from agents import Agent, Runner, function_tool
 
 from agents import ToolOutputImage, ToolOutputText
-from openkb.agent.tools import get_wiki_page_content, read_wiki_file, read_wiki_image
+from openkb.agent.tools import (
+    get_wiki_page_content,
+    read_wiki_file,
+    read_wiki_image,
+    write_kb_file,
+)
 
 MAX_TURNS = 50
 from openkb.schema import get_agents_md
@@ -91,6 +96,43 @@ def get_image(image_path: str) -> ToolOutputImage | ToolOutputText:
     )
 
 
+def build_chat_agent(
+    kb_dir: Path,
+    model: str,
+    language: str = "en",
+) -> Agent:
+    """Build the chat agent: query agent + a write tool restricted to
+    ``<kb>/wiki/explorations/**`` and ``<kb>/output/**``.
+
+    This is the variant used by the interactive ``openkb chat`` REPL so users
+    can iterate on generated artifacts (e.g. ``output/skills/<name>/``) via
+    natural-language follow-ups without giving the agent unrestricted write
+    access to the wiki.
+    """
+    wiki_root = str(kb_dir / "wiki")
+    kb_root = str(kb_dir)
+    base = build_query_agent(wiki_root, model, language=language)
+
+    @function_tool
+    def write_file(path: str, content: str) -> str:
+        """Write a text file under the KB.
+
+        Allowed paths (relative to KB root):
+          * ``wiki/explorations/**`` — chat-derived notes.
+          * ``output/**``            — generator artifacts (skills, etc.).
+
+        Any other path is rejected. Parent directories are created.
+
+        Args:
+            path: File path relative to KB root
+                (e.g. ``"output/skills/demo/SKILL.md"``).
+            content: Full text content to write (overwrites if file exists).
+        """
+        return write_kb_file(path, content, kb_root)
+
+    return base.clone(tools=[*base.tools, write_file])
+
+
 async def run_query(
     question: str,
     kb_dir: Path,
diff --git a/openkb/agent/tools.py b/openkb/agent/tools.py
index 8071e821..57f105df 100644
--- a/openkb/agent/tools.py
+++ b/openkb/agent/tools.py
@@ -167,6 +167,46 @@ def read_wiki_image(path: str, wiki_root: str) -> dict:
     return {"type": "image", "image_url": f"data:{mime};base64,{b64}"}
 
 
+def write_kb_file(path: str, content: str, kb_root: str) -> str:
+    """Write a text file under the KB, restricted to safe write zones.
+
+    Allowed prefixes (relative to *kb_root*):
+      * ``wiki/explorations/**`` — user-saved chat transcripts and notes.
+      * ``output/**``            — generator artifacts (skills, etc.) the
+        user iterates on via natural-language chat follow-ups.
+
+    Parent directories are created automatically. Any path outside the
+    allow-list is rejected.
+
+    Args:
+        path: File path relative to *kb_root*.
+        content: Text content to write.
+        kb_root: Absolute path to the KB root directory.
+
+    Returns:
+        ``"Written: {path}"`` on success, or an access-denied message.
+    """
+    root = Path(kb_root).resolve()
+    full_path = (root / path).resolve()
+    if not full_path.is_relative_to(root):
+        return "Access denied: path escapes KB root."
+    rel = full_path.relative_to(root)
+    parts = rel.parts
+    allowed = (
+        len(parts) >= 2 and parts[0] == "wiki" and parts[1] == "explorations"
+    ) or (
+        len(parts) >= 1 and parts[0] == "output"
+    )
+    if not allowed:
+        return (
+            "Access denied: writes are only allowed under "
+            "wiki/explorations/** or output/**."
+        )
+    full_path.parent.mkdir(parents=True, exist_ok=True)
+    full_path.write_text(content, encoding="utf-8")
+    return f"Written: {path}"
+
+
 def write_wiki_file(path: str, content: str, wiki_root: str) -> str:
     """Write or overwrite a Markdown file in the wiki.
 
diff --git a/tests/test_skill_chat_slash.py b/tests/test_skill_chat_slash.py
new file mode 100644
index 00000000..f1eb6dc4
--- /dev/null
+++ b/tests/test_skill_chat_slash.py
@@ -0,0 +1,64 @@
+"""Tests for the /skill new slash command inside openkb chat."""
+from __future__ import annotations
+
+import pytest
+from unittest.mock import AsyncMock, patch
+
+from prompt_toolkit.styles import Style
+
+from openkb.agent.chat import _handle_slash
+from openkb.agent.chat_session import ChatSession
+
+
+def _make_kb(tmp_path):
+    (tmp_path / ".openkb").mkdir()
+    (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n")
+    (tmp_path / ".openkb" / "chats").mkdir()
+    (tmp_path / "wiki" / "concepts").mkdir(parents=True)
+    (tmp_path / "wiki" / "summaries").mkdir(parents=True)
+    (tmp_path / "wiki" / "index.md").write_text("# index\n")
+    return tmp_path
+
+
+@pytest.mark.asyncio
+async def test_slash_skill_new_calls_generator(tmp_path):
+    kb = _make_kb(tmp_path)
+    session = ChatSession.new(kb, "gpt-4o-mini", "en")
+    style = Style.from_dict({})
+
+    async def fake_run(kb_dir, skill_name, intent, model):
+        target = kb_dir / "output" / "skills" / skill_name
+        target.mkdir(parents=True, exist_ok=True)
+        (target / "SKILL.md").write_text(
+            f"---\nname: {skill_name}\ndescription: t\n---\n\n# {skill_name}\n"
+        )
+
+    with patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)):
+        action = await _handle_slash(
+            '/skill new demo "test intent"', kb, session, style
+        )
+
+    assert action is None  # continues chat session
+    assert (kb / "output" / "skills" / "demo" / "SKILL.md").exists()
+    assert (kb / ".claude-plugin" / "marketplace.json").exists()
+
+
+@pytest.mark.asyncio
+async def test_slash_skill_new_reports_usage_when_args_missing(tmp_path):
+    kb = _make_kb(tmp_path)
+    session = ChatSession.new(kb, "gpt-4o-mini", "en")
+    style = Style.from_dict({})
+
+    action = await _handle_slash('/skill new', kb, session, style)
+    assert action is None
+    # No skill written
+    assert not (kb / "output").exists()
+
+
+@pytest.mark.asyncio
+async def test_slash_skill_unknown_subcommand(tmp_path):
+    kb = _make_kb(tmp_path)
+    session = ChatSession.new(kb, "gpt-4o-mini", "en")
+    style = Style.from_dict({})
+    action = await _handle_slash('/skill list', kb, session, style)
+    assert action is None

From 4e99089bd8346cdec8656767924cc7f6b1a61b97 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:35:52 +0800
Subject: [PATCH 13/25] fix(skill): harden /skill new slash + write_kb_file
 edge cases

- /skill new: catch shlex.split ValueError on unclosed quotes so a typo
  doesn't crash the chat REPL
- write_kb_file: reject bare directory paths (e.g. 'output') that would
  otherwise raise IsADirectoryError on write_text
- chat.py: drop stale build_query_agent import (chat now uses build_chat_agent)
- test_chat_slash_commands.py: update patch target from build_query_agent
  -> build_chat_agent so the test exercises the right symbol
- Add tests/test_write_kb_file.py covering allow-list, traversal,
  bare-directory rejection
---
 openkb/agent/chat.py              |  8 +++-
 openkb/agent/tools.py             | 13 +++--
 tests/test_chat_slash_commands.py |  2 +-
 tests/test_write_kb_file.py       | 79 +++++++++++++++++++++++++++++++
 4 files changed, 95 insertions(+), 7 deletions(-)
 create mode 100644 tests/test_write_kb_file.py

diff --git a/openkb/agent/chat.py b/openkb/agent/chat.py
index 04ffe4f7..4b4bb7e9 100644
--- a/openkb/agent/chat.py
+++ b/openkb/agent/chat.py
@@ -23,7 +23,7 @@
 from prompt_toolkit.styles import Style
 
 from openkb.agent.chat_session import ChatSession
-from openkb.agent.query import MAX_TURNS, build_chat_agent, build_query_agent
+from openkb.agent.query import MAX_TURNS, build_chat_agent
 from openkb.log import append_log
 
 
@@ -500,7 +500,11 @@ async def _handle_slash_skill(arg: str, kb_dir: Path, style: Style) -> None:
     """Dispatch ``/skill new <name> "<intent>"`` and any future skill subcommands."""
     import shlex
 
-    parts = shlex.split(arg) if arg else []
+    try:
+        parts = shlex.split(arg) if arg else []
+    except ValueError as exc:
+        _fmt(style, ("class:error", f"[ERROR] Could not parse: {exc}\n"))
+        return
     if not parts:
         _fmt(style, ("class:error", "Usage: /skill new <name> \"<intent>\"\n"))
         return
diff --git a/openkb/agent/tools.py b/openkb/agent/tools.py
index 57f105df..76520785 100644
--- a/openkb/agent/tools.py
+++ b/openkb/agent/tools.py
@@ -186,21 +186,26 @@ def write_kb_file(path: str, content: str, kb_root: str) -> str:
     Returns:
         ``"Written: {path}"`` on success, or an access-denied message.
     """
+    if not path:
+        return "Access denied: path must be a file under wiki/explorations/ or output/."
     root = Path(kb_root).resolve()
     full_path = (root / path).resolve()
     if not full_path.is_relative_to(root):
         return "Access denied: path escapes KB root."
     rel = full_path.relative_to(root)
     parts = rel.parts
+    # Require a file path with at least one component beyond the allow-list
+    # prefix, so a bare directory name (e.g. "output") does not slip through
+    # and crash on write_text with IsADirectoryError.
     allowed = (
-        len(parts) >= 2 and parts[0] == "wiki" and parts[1] == "explorations"
+        len(parts) >= 3 and parts[0] == "wiki" and parts[1] == "explorations"
     ) or (
-        len(parts) >= 1 and parts[0] == "output"
+        len(parts) >= 2 and parts[0] == "output"
     )
     if not allowed:
         return (
-            "Access denied: writes are only allowed under "
-            "wiki/explorations/** or output/**."
+            "Access denied: path must be a file under "
+            "wiki/explorations/ or output/."
         )
     full_path.parent.mkdir(parents=True, exist_ok=True)
     full_path.write_text(content, encoding="utf-8")
diff --git a/tests/test_chat_slash_commands.py b/tests/test_chat_slash_commands.py
index 2a781da5..a77d3bda 100644
--- a/tests/test_chat_slash_commands.py
+++ b/tests/test_chat_slash_commands.py
@@ -176,7 +176,7 @@ async def prompt_async(self) -> str:
     p, collected = _collect_fmt()
     with (
         p,
-        patch("openkb.agent.chat.build_query_agent", return_value=object()),
+        patch("openkb.agent.chat.build_chat_agent", return_value=object()),
         patch("openkb.agent.chat._print_header"),
         patch("openkb.agent.chat._make_prompt_session", return_value=prompt),
         patch("openkb.agent.chat._handle_slash", new_callable=AsyncMock, side_effect=KeyboardInterrupt),
diff --git a/tests/test_write_kb_file.py b/tests/test_write_kb_file.py
new file mode 100644
index 00000000..7688b1b6
--- /dev/null
+++ b/tests/test_write_kb_file.py
@@ -0,0 +1,79 @@
+"""Regression tests for ``openkb.agent.tools.write_kb_file``.
+
+Covers the allow-list (``wiki/explorations/**`` and ``output/**``), path
+traversal rejection, the bare-directory guard (e.g. ``"output"`` alone),
+and automatic parent-directory creation.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from openkb.agent.tools import write_kb_file
+
+
+@pytest.fixture
+def kb_root(tmp_path: Path) -> str:
+    return str(tmp_path)
+
+
+def test_rejects_empty_path(kb_root: str) -> None:
+    result = write_kb_file("", "hi", kb_root)
+    assert result.startswith("Access denied")
+
+
+def test_rejects_bare_output_directory(kb_root: str) -> None:
+    # "output" alone resolves to the directory itself and would crash
+    # write_text with IsADirectoryError. The guard must reject it.
+    result = write_kb_file("output", "hi", kb_root)
+    assert result.startswith("Access denied")
+    # And, critically, no file/dir should have been written at that name.
+    assert not (Path(kb_root) / "output").exists() or (Path(kb_root) / "output").is_dir()
+
+
+def test_rejects_bare_wiki_explorations_directory(kb_root: str) -> None:
+    result = write_kb_file("wiki/explorations", "hi", kb_root)
+    assert result.startswith("Access denied")
+
+
+def test_rejects_outside_allow_list(kb_root: str) -> None:
+    result = write_kb_file("wiki/summaries/x.md", "hi", kb_root)
+    assert result.startswith("Access denied")
+    assert not (Path(kb_root) / "wiki" / "summaries" / "x.md").exists()
+
+
+def test_accepts_output_skill_path(kb_root: str) -> None:
+    result = write_kb_file("output/skills/demo/SKILL.md", "# demo\n", kb_root)
+    assert result == "Written: output/skills/demo/SKILL.md"
+    written = Path(kb_root) / "output" / "skills" / "demo" / "SKILL.md"
+    assert written.read_text(encoding="utf-8") == "# demo\n"
+
+
+def test_accepts_wiki_exploration(kb_root: str) -> None:
+    result = write_kb_file("wiki/explorations/transcript.md", "notes", kb_root)
+    assert result == "Written: wiki/explorations/transcript.md"
+    written = Path(kb_root) / "wiki" / "explorations" / "transcript.md"
+    assert written.read_text(encoding="utf-8") == "notes"
+
+
+def test_rejects_path_traversal(kb_root: str) -> None:
+    result = write_kb_file("../escape.md", "evil", kb_root)
+    assert result.startswith("Access denied")
+    # Must not have written outside the KB root.
+    assert not (Path(kb_root).parent / "escape.md").exists()
+
+
+def test_rejects_traversal_via_allowed_prefix(kb_root: str) -> None:
+    # Even when starting with an allowed-looking prefix, traversal must escape
+    # the KB root (Path.resolve normalizes ``..``) and be rejected.
+    result = write_kb_file("output/../../escape.md", "evil", kb_root)
+    assert result.startswith("Access denied")
+
+
+def test_creates_parent_directories(kb_root: str) -> None:
+    # Deeply nested target with no pre-existing parents.
+    target = "output/skills/new/nested/deep/SKILL.md"
+    result = write_kb_file(target, "deep", kb_root)
+    assert result == f"Written: {target}"
+    assert (Path(kb_root) / target).read_text(encoding="utf-8") == "deep"

From c961e9a2861ad4c71bd5bcff5090e8ff222928a6 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:37:36 +0800
Subject: [PATCH 14/25] docs(skill): README + CONTRIBUTING + PR template for
 skill submissions

---
 .../PULL_REQUEST_TEMPLATE/skill_submission.md | 22 ++++++
 CONTRIBUTING.md                               | 71 +++++++++++++++++++
 README.md                                     | 41 +++++++++++
 3 files changed, 134 insertions(+)
 create mode 100644 .github/PULL_REQUEST_TEMPLATE/skill_submission.md
 create mode 100644 CONTRIBUTING.md

diff --git a/.github/PULL_REQUEST_TEMPLATE/skill_submission.md b/.github/PULL_REQUEST_TEMPLATE/skill_submission.md
new file mode 100644
index 00000000..a413744a
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE/skill_submission.md
@@ -0,0 +1,22 @@
+## Skill submission
+
+- **Name:** `<skill-name>`
+- **What it does (one sentence):**
+- **Source material:**
+
+### Checklist
+
+- [ ] I have the right to redistribute the source material this skill is
+      derived from (public, my own work, properly licensed, or fair-use
+      methodology only — not bulk-copied copyrighted text).
+- [ ] `SKILL.md` has valid YAML frontmatter with `name:` and `description:`
+      (description ≤ 1024 chars).
+- [ ] All `[[references/...]]` links resolve.
+- [ ] I added an entry under `plugins[0].skills` in
+      `.claude-plugin/marketplace.json`.
+- [ ] I installed and tested this skill locally in at least one agent CLI
+      (Claude Code / Codex CLI / Gemini CLI / Cursor).
+
+### Notes for reviewers
+
+<anything else maintainers should know>
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..dfbb7eb2
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,71 @@
+# Contributing to OpenKB
+
+Thanks for your interest in OpenKB! There are two ways to contribute: code (bug
+fixes, new features) and skills (compiled knowledge artifacts).
+
+## Code contributions
+
+Standard GitHub workflow:
+
+1. Fork the repo and create a feature branch off `main`.
+2. Add tests for any new behaviour. Run `uv run pytest` to verify they pass.
+3. Open a PR with a clear description and link to any related issues.
+4. Address review feedback. A maintainer will merge once everything looks good.
+
+## Submitting a skill
+
+OpenKB doubles as a registry for compiled-knowledge skills. The `skills/`
+directory in this repo hosts the official `openkb` skill plus
+community-contributed skills installable via:
+
+```bash
+npx skills@latest add VectifyAI/OpenKB
+```
+
+To submit your skill:
+
+### 1. Compile and test locally
+
+```bash
+cd <your-kb>
+openkb skill new <skill-name> "<one-sentence intent>"
+cp -r output/skills/<skill-name> ~/.claude/skills/
+# Verify the skill activates in a Claude Code session on a relevant question
+```
+
+### 2. Open a PR against this repo
+
+1. Fork `VectifyAI/OpenKB`.
+2. Copy your skill into the fork at `skills/<your-skill-name>/`.
+3. Add a new entry under `plugins[0].skills` in `.claude-plugin/marketplace.json`:
+   ```json
+   "./skills/<your-skill-name>"
+   ```
+4. Open a PR with the "Skill submission" template (auto-applied when you create
+   the PR).
+
+### 3. Review
+
+A maintainer will review your submission against this checklist:
+
+- `SKILL.md` has a valid YAML frontmatter with `name:` and `description:`
+  (description ≤ 1024 characters).
+- The `description` is specific (not "a skill about X").
+- All `[[references/...]]` links resolve to files you actually included.
+- Skill name doesn't clash with an existing entry.
+- No obvious copyright-infringing content (large verbatim copies of recent
+  copyrighted books or papers without authorisation).
+
+You're responsible for confirming you have rights to redistribute the source
+material your skill is derived from. The PR template includes a checkbox for
+this.
+
+### What makes a good community skill
+
+- **Methodology-focused**, not bulk-copied content. "How to reason about X"
+  beats "the full text of X".
+- **Specific scope** so the `description:` field can be precise enough that
+  loading agents pick it up at the right time.
+- **Tested in at least one agent CLI** before submission.
+
+Thanks for sharing what you've compiled!
diff --git a/README.md b/README.md
index 840a051d..41278d29 100644
--- a/README.md
+++ b/README.md
@@ -150,6 +150,7 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume
 |---|---|
 | `openkb init` | Initialize a new knowledge base (interactive) |
 | <code>openkb&nbsp;add&nbsp;&lt;file_or_dir_or_URL&gt;</code> | Add documents and compile to wiki. URL ingest auto-detects PDF (saved as `.pdf` → PageIndex / markitdown) vs HTML (trafilatura main-content extract → `.md`) |
+| <code>openkb&nbsp;skill&nbsp;new&nbsp;&lt;name&gt;&nbsp;"&lt;intent&gt;"</code> | Compile a skill from this KB's wiki into `output/skills/<name>/` and update the marketplace manifest |
 | <code>openkb&nbsp;remove&nbsp;&lt;doc&gt;</code> | Remove a document and clean up its wiki pages, images, registry, and PageIndex state (use `--dry-run` to preview, `--keep-raw` / `--keep-empty-concepts` to retain artifacts) |
 | <code>openkb&nbsp;query&nbsp;"question"</code> | Ask a question over the knowledge base (use `--save` to save the answer to `wiki/explorations/`) |
 | `openkb chat` | Start an interactive multi-turn chat (use `--resume`, `--list`, `--delete` to manage sessions) |
@@ -161,6 +162,46 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume
 
 <!-- | `openkb lint --fix` | Auto-fix what it can | -->
 
+### Skills — compile your wiki into a redistributable skill
+
+Once you have a populated wiki, you can compile a subset of it into an
+**Anthropic Skill** — a portable folder that Claude Code, Codex CLI,
+Gemini CLI, and Cursor all know how to load.
+
+```bash
+openkb skill new karpathy-thinking \
+  "Reason about transformers and attention in Karpathy's style"
+```
+
+This produces `output/skills/karpathy-thinking/` with `SKILL.md`,
+optional `references/`, and an auto-updated
+`.claude-plugin/marketplace.json` for distribution.
+
+**Install locally:**
+
+```bash
+cp -r output/skills/karpathy-thinking ~/.claude/skills/
+```
+
+**Share with others:**
+
+Push your KB directory to GitHub, then anyone can install all your skills with one command:
+
+```bash
+npx skills@latest add <your-org>/<your-repo>
+```
+
+You can also iterate inside chat:
+
+```
+/skill new karpathy-thinking "Reason about transformers like Karpathy"
+[generation streams]
+> description is too generic, make it about transformer implementations specifically
+[agent edits SKILL.md frontmatter in place]
+```
+
+See [CONTRIBUTING.md](CONTRIBUTING.md) for how to submit your compiled skill back to the OpenKB community registry.
+
 ### Interactive Chat
 
 `openkb chat` opens an interactive chat session over your wiki knowledge base. Unlike the one-shot `openkb query`, each turn carries the conversation history, so you can dig into a topic without re-typing context.

From 439d017b083b40c2232236134a21b3ac579579ea Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 13:51:01 +0800
Subject: [PATCH 15/25] fix(skill): align tool names + add safety gates to chat
 /skill new
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

C1: /skill new in chat had only name validation — no wiki-exists check,
no wiki-content check, no overwrite guard. The CLI had all three. Extract
the gates into _preflight_skill_new and call from both. Add explicit
'remove existing skill first' message in chat (no -y equivalent there).

C2: System prompt advertised tool names (list_wiki_dir, read_wiki_file,
write_skill_file) that didn't match what was registered with
@function_tool (list_wiki, read_wiki, write_skill). LLM saw the registered
names; prompt references would confuse it. Rename the wrappers to match.

I1: query_wiki was a sync @function_tool calling asyncio.run() on
run_query — works only because openai-agents SDK runs sync tools on
worker threads. Convert to async @function_tool so the runner awaits it
in the same loop, eliminating the nested-asyncio fragility.

Add 2 regression tests for the chat safety gates.
---
 openkb/agent/chat.py           | 14 +++++-
 openkb/agent/skill_compiler.py | 28 ++++++------
 openkb/cli.py                  | 79 ++++++++++++++++++++++------------
 tests/test_skill_chat_slash.py | 41 ++++++++++++++++++
 4 files changed, 118 insertions(+), 44 deletions(-)

diff --git a/openkb/agent/chat.py b/openkb/agent/chat.py
index 4b4bb7e9..2f523dc3 100644
--- a/openkb/agent/chat.py
+++ b/openkb/agent/chat.py
@@ -521,12 +521,22 @@ async def _handle_slash_skill(arg: str, kb_dir: Path, style: Style) -> None:
     name = parts[1]
     intent = " ".join(parts[2:])
 
-    from openkb.cli import _validate_skill_name
-    err = _validate_skill_name(name)
+    # Use the same safety gates as the CLI (name validation, wiki dir,
+    # wiki content). Chat doesn't have a -y flag, so existing skills
+    # block with a clear instruction to delete first.
+    from openkb.cli import _preflight_skill_new
+    err = _preflight_skill_new(kb_dir, name, yes_flag=False)
     if err:
         _fmt(style, ("class:error", f"[ERROR] {err}\n"))
         return
 
+    target = kb_dir / "output" / "skills" / name
+    if target.exists():
+        _fmt(style, ("class:error",
+            f"[ERROR] output/skills/{name}/ already exists. Remove it first "
+            f"with `rm -rf output/skills/{name}` and re-run.\n"))
+        return
+
     # Load model from KB config
     from openkb.config import load_config, DEFAULT_CONFIG
     config = load_config(kb_dir / ".openkb" / "config.yaml")
diff --git a/openkb/agent/skill_compiler.py b/openkb/agent/skill_compiler.py
index 79b64751..0733673a 100644
--- a/openkb/agent/skill_compiler.py
+++ b/openkb/agent/skill_compiler.py
@@ -13,16 +13,15 @@
 """
 from __future__ import annotations
 
-import asyncio
 from pathlib import Path
 
 from agents import Agent, Runner, function_tool
 from agents.model_settings import ModelSettings
 
 from openkb.agent.skill_tools import (
-    list_wiki_dir,
-    read_wiki_file_for_skill,
-    write_skill_file,
+    list_wiki_dir as _list_wiki_dir_impl,
+    read_wiki_file_for_skill as _read_wiki_file_impl,
+    write_skill_file as _write_skill_file_impl,
 )
 from openkb.prompts import load_prompt
 from openkb.schema import get_agents_md
@@ -59,32 +58,31 @@ def build_skill_compile_agent(
     )
 
     @function_tool
-    def list_wiki(directory: str) -> str:
+    def list_wiki_dir(directory: str) -> str:
         """List .md files in a wiki subdirectory (e.g. 'concepts')."""
-        return list_wiki_dir(directory, wiki_root)
+        return _list_wiki_dir_impl(directory, wiki_root)
 
     @function_tool
-    def read_wiki(path: str) -> str:
+    def read_wiki_file(path: str) -> str:
         """Read a wiki markdown file by path relative to wiki/ (e.g. 'concepts/attention.md')."""
-        return read_wiki_file_for_skill(path, wiki_root)
+        return _read_wiki_file_impl(path, wiki_root)
 
     @function_tool
-    def query_wiki(question: str) -> str:
+    async def query_wiki(question: str) -> str:
         """Run a semantic query over the wiki and return the answer.
 
         Use sparingly — this is itself an LLM call. Prefer reading specific
         files when you already know which one you want.
         """
-        # Lazy import to avoid circular dependency at module import.
+        # Lazy import to avoid a circular dependency at module load time.
         from openkb.agent.query import run_query
         kb_dir = Path(wiki_root).parent
-        config_model = model
-        return asyncio.run(run_query(question, kb_dir, config_model, stream=False))
+        return await run_query(question, kb_dir, model, stream=False)
 
     @function_tool
-    def write_skill(path: str, content: str) -> str:
+    def write_skill_file(path: str, content: str) -> str:
         """Write a file under the skill directory."""
-        return write_skill_file(path, content, skill_root)
+        return _write_skill_file_impl(path, content, skill_root)
 
     @function_tool
     def done(summary: str) -> str:
@@ -94,7 +92,7 @@ def done(summary: str) -> str:
     return Agent(
         name="skill-compiler",
         instructions=instructions,
-        tools=[list_wiki, read_wiki, query_wiki, write_skill, done],
+        tools=[list_wiki_dir, read_wiki_file, query_wiki, write_skill_file, done],
         model=f"litellm/{model}",
         model_settings=ModelSettings(parallel_tool_calls=False),
     )
diff --git a/openkb/cli.py b/openkb/cli.py
index b96894ba..95e8d8d1 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -165,6 +165,54 @@ def _validate_skill_name(name: str) -> str | None:
     return None
 
 
+def _preflight_skill_new(
+    kb_dir: Path, name: str, yes_flag: bool,
+) -> str | None:
+    """Run all the safety gates for ``openkb skill new`` / ``/skill new``.
+
+    Returns ``None`` if it's safe to proceed (caller will then either
+    proceed or, for the overwrite case, call ``_clear_existing_skill_dir``
+    after confirming with the user). Returns an error message string if
+    one of the gates trips.
+
+    NOTE: This intentionally does NOT handle the overwrite confirmation
+    itself — that's caller-specific (TTY-based ``click.confirm`` in CLI;
+    explicit ``--force``-style flag in chat). It only returns an error if
+    the target dir exists AND ``yes_flag`` is False, and the caller is
+    expected to detect that error message and prompt as appropriate.
+    """
+    err = _validate_skill_name(name)
+    if err:
+        return err
+
+    wiki = kb_dir / "wiki"
+    if not wiki.is_dir():
+        return (
+            "No wiki found in this KB. Run `openkb add <source>` to "
+            "ingest documents first."
+        )
+
+    has_content = any(
+        (wiki / sub).is_dir() and any((wiki / sub).iterdir())
+        for sub in ("concepts", "summaries")
+    )
+    if not has_content:
+        return (
+            "Wiki has no compiled content yet. Ingest at least one "
+            "document with `openkb add` first."
+        )
+
+    return None
+
+
+def _clear_existing_skill_dir(kb_dir: Path, name: str) -> None:
+    """Delete an existing ``<kb>/output/skills/<name>/`` directory."""
+    import shutil
+    target = kb_dir / "output" / "skills" / name
+    if target.exists():
+        shutil.rmtree(target)
+
+
 def add_single_file(file_path: Path, kb_dir: Path) -> Literal["added", "skipped", "failed"]:
     """Convert, index, and compile a single document into the knowledge base.
 
@@ -1393,7 +1441,6 @@ def skill_new(ctx, name, intent, yes_flag):
       openkb skill new karpathy-thinking "Reason about transformers like Karpathy"
     """
     import asyncio
-    import shutil
     import sys
 
     kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
@@ -1401,38 +1448,16 @@ def skill_new(ctx, name, intent, yes_flag):
         click.echo("No knowledge base found. Run `openkb init` first.", err=True)
         ctx.exit(1)
 
-    # Validate name
-    err = _validate_skill_name(name)
+    err = _preflight_skill_new(kb_dir, name, yes_flag)
     if err:
         click.echo(f"[ERROR] {err}", err=True)
-        ctx.exit(2)
-
-    # Validate wiki exists and has content
-    wiki = kb_dir / "wiki"
-    if not wiki.is_dir():
-        click.echo(
-            "[ERROR] No wiki found in this KB. Run `openkb add <source>` "
-            "to ingest documents first.",
-            err=True,
-        )
-        ctx.exit(1)
-    has_content = any(
-        (wiki / sub).is_dir() and any((wiki / sub).iterdir())
-        for sub in ("concepts", "summaries")
-    )
-    if not has_content:
-        click.echo(
-            "[ERROR] Wiki has no compiled content yet. Ingest at least one "
-            "document with `openkb add` first.",
-            err=True,
-        )
         ctx.exit(1)
 
-    # Overwrite handling
+    # Overwrite handling (CLI-specific)
     target = kb_dir / "output" / "skills" / name
     if target.exists():
         if yes_flag:
-            shutil.rmtree(target)
+            _clear_existing_skill_dir(kb_dir, name)
         elif sys.stdin.isatty():
             if not click.confirm(
                 f"output/skills/{name}/ already exists. Overwrite?",
@@ -1440,7 +1465,7 @@ def skill_new(ctx, name, intent, yes_flag):
             ):
                 click.echo("Aborted.")
                 ctx.exit(1)
-            shutil.rmtree(target)
+            _clear_existing_skill_dir(kb_dir, name)
         else:
             click.echo(
                 f"[ERROR] output/skills/{name}/ exists. Pass -y to overwrite "
diff --git a/tests/test_skill_chat_slash.py b/tests/test_skill_chat_slash.py
index f1eb6dc4..377f1a91 100644
--- a/tests/test_skill_chat_slash.py
+++ b/tests/test_skill_chat_slash.py
@@ -17,6 +17,9 @@ def _make_kb(tmp_path):
     (tmp_path / "wiki" / "concepts").mkdir(parents=True)
     (tmp_path / "wiki" / "summaries").mkdir(parents=True)
     (tmp_path / "wiki" / "index.md").write_text("# index\n")
+    # Populate so wiki-content gate accepts
+    (tmp_path / "wiki" / "concepts" / "demo.md").write_text("# demo\n")
+    (tmp_path / "wiki" / "summaries" / "demo.md").write_text("# demo\n")
     return tmp_path
 
 
@@ -62,3 +65,41 @@ async def test_slash_skill_unknown_subcommand(tmp_path):
     style = Style.from_dict({})
     action = await _handle_slash('/skill list', kb, session, style)
     assert action is None
+
+
+@pytest.mark.asyncio
+async def test_slash_skill_new_rejects_empty_wiki(tmp_path):
+    """Chat / slash command must catch freshly-init'd KBs (no compiled content)."""
+    kb = tmp_path
+    (kb / ".openkb").mkdir()
+    (kb / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n")
+    (kb / ".openkb" / "chats").mkdir()
+    # Empty wiki/ — exactly what `openkb init` creates
+    (kb / "wiki" / "concepts").mkdir(parents=True)
+    (kb / "wiki" / "summaries").mkdir(parents=True)
+    (kb / "wiki" / "index.md").write_text("# index\n")
+
+    session = ChatSession.new(kb, "gpt-4o-mini", "en")
+    style = Style.from_dict({})
+
+    action = await _handle_slash('/skill new demo "intent"', kb, session, style)
+    assert action is None
+    assert not (kb / "output").exists()
+
+
+@pytest.mark.asyncio
+async def test_slash_skill_new_rejects_when_target_exists(tmp_path):
+    """Chat / slash command must not silently overwrite an existing skill."""
+    kb = _make_kb(tmp_path)
+    (kb / "wiki" / "concepts" / "x.md").write_text("x")
+    (kb / "wiki" / "summaries" / "x.md").write_text("x")
+    (kb / "output" / "skills" / "demo").mkdir(parents=True)
+    (kb / "output" / "skills" / "demo" / "stale.txt").write_text("old")
+
+    session = ChatSession.new(kb, "gpt-4o-mini", "en")
+    style = Style.from_dict({})
+
+    action = await _handle_slash('/skill new demo "intent"', kb, session, style)
+    assert action is None
+    # stale.txt must still be there (we didn't overwrite)
+    assert (kb / "output" / "skills" / "demo" / "stale.txt").read_text() == "old"

From 746720098ccce057498840ae9f928e7f8350ef0e Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 15:04:53 +0800
Subject: [PATCH 16/25] fix(skill): lock marketplace naming to openkb@vectify
 convention

The previous impl used the KB directory name as both the marketplace
'name' and the plugin 'name', and stitched together a metadata
description by truncating the first skill's SKILL.md description at
200 chars (often mid-word). Lock the convention to match
skills/openkb/.claude-plugin/marketplace.json from the official skill:

- marketplace name: 'vectify' (always)
- plugin name: 'openkb' (always)
- description: fixed string, no SKILL.md content injection, no truncation

Different KBs are distinguished by <owner>/<repo> URL, not manifest
name. Users get one canonical install command (/plugin install
openkb@vectify) regardless of which KB they're consuming.

Also fix _git_owner to pass cwd=kb_dir so 'openkb --kb-dir ... skill
new' run from anywhere reads the KB's git config, not the process CWD.
---
 openkb/marketplace.py     | 36 ++++++++++++++++--------------------
 tests/test_marketplace.py | 38 +++++++++++++++++++++++++++++++++-----
 2 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/openkb/marketplace.py b/openkb/marketplace.py
index 48887afe..e78df148 100644
--- a/openkb/marketplace.py
+++ b/openkb/marketplace.py
@@ -27,11 +27,12 @@
 _DESCRIPTION_RE = re.compile(r"^description:\s*(.+?)\s*$", re.MULTILINE)
 
 
-def _git_owner() -> dict[str, str]:
-    """Read user.name and user.email from git config for the manifest owner.
+def _git_owner(kb_dir: Path) -> dict[str, str]:
+    """Read user.name and user.email from git config (run in kb_dir context).
 
-    Falls back to placeholders if git isn't configured — the manifest is
-    still valid, just less helpful for marketplace listings.
+    Falls back to placeholders if git isn't configured. ``cwd=kb_dir`` so
+    that ``git config`` resolves the KB's local-or-walked-up settings,
+    not the process's working directory at the time of CLI invocation.
     """
     import subprocess
 
@@ -40,6 +41,7 @@ def _git(key: str) -> str:
             result = subprocess.run(
                 ["git", "config", "--get", key],
                 capture_output=True, text=True, timeout=2,
+                cwd=str(kb_dir),
             )
             return result.stdout.strip()
         except (subprocess.SubprocessError, FileNotFoundError):
@@ -71,11 +73,6 @@ def _read_skill_description(skill_md: Path) -> str:
     return desc_match.group(1).strip()
 
 
-def _kb_name(kb_dir: Path) -> str:
-    """Use the KB directory name as the marketplace name (sluggable)."""
-    return kb_dir.name
-
-
 def _list_skill_dirs(kb_dir: Path) -> list[Path]:
     """Return skill directories under <kb>/output/skills/ that contain a SKILL.md."""
     skills_root = kb_dir / "output" / "skills"
@@ -91,23 +88,22 @@ def _build_manifest(kb_dir: Path) -> dict[str, Any]:
     skills = _list_skill_dirs(kb_dir)
     skill_paths = [f"./output/skills/{d.name}" for d in skills]
 
-    # Aggregate description for the manifest metadata
-    name = _kb_name(kb_dir)
+    # Fixed clean descriptions — no truncation, no SKILL.md interpolation.
+    # Naming convention is locked to `openkb@vectify` so users get one
+    # canonical install command regardless of which KB they're consuming;
+    # different KBs are distinguished by <owner>/<repo> URL.
     metadata_desc = (
-        f"Skills compiled from the '{name}' knowledge base via OpenKB."
+        f"Skills compiled from the {kb_dir.name} knowledge base via OpenKB."
     )
-    if skills:
-        first_desc = _read_skill_description(skills[0] / "SKILL.md")
-        if first_desc:
-            metadata_desc += f" Featured: {first_desc[:200]}"
+    plugin_desc = "Knowledge skills compiled from this OpenKB-managed knowledge base."
 
     # Pull KB config for version if available; default to 0.1.0
     config = load_config(kb_dir / ".openkb" / "config.yaml")
     version = str(config.get("version", "0.1.0"))
 
-    owner = _git_owner()
+    owner = _git_owner(kb_dir)
     return {
-        "name": name,
+        "name": "vectify",
         "owner": owner,
         "metadata": {
             "description": metadata_desc,
@@ -115,8 +111,8 @@ def _build_manifest(kb_dir: Path) -> dict[str, Any]:
         },
         "plugins": [
             {
-                "name": name,
-                "description": metadata_desc,
+                "name": "openkb",
+                "description": plugin_desc,
                 "source": "./",
                 "version": version,
                 "author": owner,
diff --git a/tests/test_marketplace.py b/tests/test_marketplace.py
index 62c94f6c..dee118d5 100644
--- a/tests/test_marketplace.py
+++ b/tests/test_marketplace.py
@@ -36,10 +36,8 @@ def test_regenerate_creates_manifest_with_one_skill(tmp_path):
 
     manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
     assert manifest["plugins"][0]["skills"] == ["./output/skills/karpathy-thinking"]
-    # The plugin's description carries the SKILL.md description for the latest entry,
-    # OR we use a fixed manifest-level description — implementation choice; we just
-    # check the SKILL.md description appears somewhere in the manifest JSON.
-    assert "Reason like Karpathy" in json.dumps(manifest)
+    # Naming convention is locked: top-level marketplace name is always "vectify".
+    assert manifest["name"] == "vectify"
 
 
 def test_regenerate_lists_multiple_skills_alphabetical(tmp_path):
@@ -67,7 +65,7 @@ def test_regenerate_replaces_existing_file(tmp_path):
     regenerate_marketplace(kb)
 
     manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
-    assert manifest["name"] != "stale"
+    assert manifest["name"] == "vectify"
     assert manifest["plugins"][0]["skills"] == ["./output/skills/demo"]
 
 
@@ -155,3 +153,33 @@ def fake_run(cmd, **kwargs):
     assert manifest["owner"]["name"] == "openkb-user"
     # email is optional when missing
     assert "email" not in manifest["owner"] or manifest["owner"]["email"] == ""
+
+
+def test_regenerate_uses_openkb_at_vectify_convention(tmp_path):
+    """All OpenKB-generated marketplaces must self-identify as 'vectify'
+    (top level) with a plugin named 'openkb', so users install via the
+    canonical `openkb@vectify` regardless of which KB they're consuming.
+    Different KBs are distinguished by <owner>/<repo> URL, not manifest name."""
+    kb = _make_kb(tmp_path)
+    _make_skill(kb, "demo", "d")
+    regenerate_marketplace(kb)
+
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    assert manifest["name"] == "vectify"
+    assert manifest["plugins"][0]["name"] == "openkb"
+
+
+def test_regenerate_description_is_not_truncated(tmp_path):
+    """Manifest description must be a clean fixed string — no truncation
+    of SKILL.md content, no '...' mid-word."""
+    kb = _make_kb(tmp_path)
+    _make_skill(kb, "demo", "the specific description goes here")
+    regenerate_marketplace(kb)
+
+    manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
+    # Must not contain the per-skill description (we don't inject it anymore)
+    assert "the specific description goes here" not in manifest["metadata"]["description"]
+    # Must not end with a truncated word (no trailing space-letter-letter etc.)
+    desc = manifest["metadata"]["description"]
+    assert not desc.endswith(" ")
+    assert desc.endswith(".") or desc.endswith("OpenKB.")

From 5d76d58f1a18fa25280075da4a165399afce26a6 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 15:33:34 +0800
Subject: [PATCH 17/25] =?UTF-8?q?refactor(skill):=20address=20PR=20#57=20r?=
 =?UTF-8?q?eview=20feedback=20+=20rename=20compiler=E2=86=92creator?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename openkb/agent/skill_compiler.py → skill_creator.py (and
  associated symbols + prompt file). The existing
  openkb/agent/compiler.py owns 'compile' for raw → wiki; this module
  generates a skill from compiled wiki content, which is closer to
  'create' / 'distill'. Disambiguates the verb in one package.

- Translate MaxTurnsExceeded into a friendly RuntimeError inside
  run_skill_create. Both the CLI and chat call sites only catch
  RuntimeError; the SDK exception leaked a Python traceback before.

- Defer the rmtree-on-overwrite until after _setup_llm_key and
  load_config succeed. Previously an unset API key would wipe the
  existing skill output with nothing to replace it.

- Fix marketplace.py module docstring: don't claim chat-side SKILL.md
  edits regenerate the manifest (they don't).

- Drop unused yes_flag from _preflight_skill_new + rewrite its
  docstring to match what the function actually does.

- Clean up github-code-quality bot findings: unused pytest imports
  in 2 test files, unused 'manifest' local in test_marketplace.py
  (replaced with the assertion the test intended), redundant
  in-function stdlib imports in openkb/cli.py.

Add 2 regression tests:
- test_run_skill_create_translates_max_turns_to_runtime_error
- test_skill_new_keeps_existing_skill_when_key_setup_fails
---
 openkb/agent/chat.py                          |  2 +-
 .../{skill_compiler.py => skill_creator.py}   | 31 +++++++----
 openkb/agent/skill_tools.py                   |  6 +--
 openkb/cli.py                                 | 54 ++++++++++---------
 openkb/generator.py                           |  6 +--
 openkb/marketplace.py                         | 21 +++++---
 openkb/prompts/__init__.py                    |  2 +-
 .../{skill_compile.md => skill_create.md}     |  2 +-
 tests/test_generator.py                       |  4 +-
 tests/test_marketplace.py                     | 12 +++--
 tests/test_skill_chat_slash.py                |  2 +-
 tests/test_skill_cli.py                       | 26 +++++++--
 ...kill_compiler.py => test_skill_creator.py} | 47 ++++++++++++----
 tests/test_skill_tools.py                     |  4 +-
 14 files changed, 141 insertions(+), 78 deletions(-)
 rename openkb/agent/{skill_compiler.py => skill_creator.py} (81%)
 rename openkb/prompts/{skill_compile.md => skill_create.md} (97%)
 rename tests/{test_skill_compiler.py => test_skill_creator.py} (59%)

diff --git a/openkb/agent/chat.py b/openkb/agent/chat.py
index 2f523dc3..afabc745 100644
--- a/openkb/agent/chat.py
+++ b/openkb/agent/chat.py
@@ -525,7 +525,7 @@ async def _handle_slash_skill(arg: str, kb_dir: Path, style: Style) -> None:
     # wiki content). Chat doesn't have a -y flag, so existing skills
     # block with a clear instruction to delete first.
     from openkb.cli import _preflight_skill_new
-    err = _preflight_skill_new(kb_dir, name, yes_flag=False)
+    err = _preflight_skill_new(kb_dir, name)
     if err:
         _fmt(style, ("class:error", f"[ERROR] {err}\n"))
         return
diff --git a/openkb/agent/skill_compiler.py b/openkb/agent/skill_creator.py
similarity index 81%
rename from openkb/agent/skill_compiler.py
rename to openkb/agent/skill_creator.py
index 0733673a..2e023c4e 100644
--- a/openkb/agent/skill_compiler.py
+++ b/openkb/agent/skill_creator.py
@@ -1,9 +1,9 @@
-"""The skill-compile agent.
+"""The skill-create agent.
 
 Builds on the same openai-agents-SDK + LiteLLM stack as the query agent
 in ``openkb.agent.query``. The differences:
 
-* System prompt comes from ``openkb/prompts/skill_compile.md`` and is
+* System prompt comes from ``openkb/prompts/skill_create.md`` and is
   interpolated with the user's intent and the target skill name.
 * Tools are scoped: read the wiki, query the wiki, write only within
   the target skill directory.
@@ -29,7 +29,7 @@
 MAX_TURNS = 80  # higher than query (50) because compile can write multiple files
 
 
-def build_skill_compile_agent(
+def build_skill_create_agent(
     *,
     wiki_root: str,
     skill_root: str,
@@ -51,7 +51,7 @@ def build_skill_compile_agent(
     Path(skill_root).mkdir(parents=True, exist_ok=True)
 
     wiki_schema = get_agents_md(Path(wiki_root))
-    instructions = load_prompt("skill_compile").format(
+    instructions = load_prompt("skill_create").format(
         intent=intent,
         skill_name=skill_name,
         wiki_schema=wiki_schema,
@@ -90,7 +90,7 @@ def done(summary: str) -> str:
         return f"Compilation marked done: {summary}"
 
     return Agent(
-        name="skill-compiler",
+        name="skill-creator",
         instructions=instructions,
         tools=[list_wiki_dir, read_wiki_file, query_wiki, write_skill_file, done],
         model=f"litellm/{model}",
@@ -98,7 +98,7 @@ def done(summary: str) -> str:
     )
 
 
-async def run_skill_compile(
+async def run_skill_create(
     *,
     kb_dir: Path,
     skill_name: str,
@@ -108,12 +108,13 @@ async def run_skill_compile(
     """Compile a single skill from the KB's wiki.
 
     Returns the path to the produced skill directory. Raises
-    ``RuntimeError`` if the agent finishes without writing ``SKILL.md``.
+    ``RuntimeError`` if the agent finishes without writing ``SKILL.md``,
+    or if the SDK hits its turn cap before the agent declares done.
     """
     wiki_root = str(kb_dir / "wiki")
     skill_root = kb_dir / "output" / "skills" / skill_name
 
-    agent = build_skill_compile_agent(
+    agent = build_skill_create_agent(
         wiki_root=wiki_root,
         skill_root=str(skill_root),
         skill_name=skill_name,
@@ -128,7 +129,19 @@ async def run_skill_compile(
         f"working method. Read the wiki, then write the skill files."
     )
 
-    await Runner.run(agent, seed, max_turns=MAX_TURNS)
+    # Lazy import: keeps the top of this module independent of the SDK's
+    # internal exception layout in case its export path moves.
+    from agents.exceptions import MaxTurnsExceeded
+
+    try:
+        await Runner.run(agent, seed, max_turns=MAX_TURNS)
+    except MaxTurnsExceeded as exc:
+        raise RuntimeError(
+            f"Skill compilation hit the {MAX_TURNS}-step cap before finishing. "
+            f"The wiki may be too large for a single skill, or the intent may "
+            f"be too broad. Try splitting into multiple skills with narrower "
+            f"intents, or pass a smaller wiki subset."
+        ) from exc
 
     if not (skill_root / "SKILL.md").exists():
         raise RuntimeError(
diff --git a/openkb/agent/skill_tools.py b/openkb/agent/skill_tools.py
index ef24d94e..3b7c736e 100644
--- a/openkb/agent/skill_tools.py
+++ b/openkb/agent/skill_tools.py
@@ -1,8 +1,8 @@
-"""Path-scoped IO tools for the skill-compile agent.
+"""Path-scoped IO tools for the skill-create agent.
 
-The skill-compile agent runs with three capabilities:
+The skill-create agent runs with three capabilities:
   * READ from <kb>/wiki/**            (the substrate)
-  * QUERY the wiki via openkb query   (semantic retrieval, see skill_compiler.py)
+  * QUERY the wiki via openkb query   (semantic retrieval, see skill_creator.py)
   * WRITE under <kb>/output/skills/<name>/**   (the only output destination)
 
 These helpers enforce those boundaries at the Python level — every tool
diff --git a/openkb/cli.py b/openkb/cli.py
index 95e8d8d1..5e1a26be 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -165,21 +165,20 @@ def _validate_skill_name(name: str) -> str | None:
     return None
 
 
-def _preflight_skill_new(
-    kb_dir: Path, name: str, yes_flag: bool,
-) -> str | None:
-    """Run all the safety gates for ``openkb skill new`` / ``/skill new``.
-
-    Returns ``None`` if it's safe to proceed (caller will then either
-    proceed or, for the overwrite case, call ``_clear_existing_skill_dir``
-    after confirming with the user). Returns an error message string if
-    one of the gates trips.
-
-    NOTE: This intentionally does NOT handle the overwrite confirmation
-    itself — that's caller-specific (TTY-based ``click.confirm`` in CLI;
-    explicit ``--force``-style flag in chat). It only returns an error if
-    the target dir exists AND ``yes_flag`` is False, and the caller is
-    expected to detect that error message and prompt as appropriate.
+def _preflight_skill_new(kb_dir: Path, name: str) -> str | None:
+    """Run shared safety gates for ``openkb skill new`` / ``/skill new``.
+
+    Checks (in order):
+      * skill name is a valid kebab-case slug
+      * ``<kb>/wiki`` exists
+      * ``<kb>/wiki/concepts`` or ``<kb>/wiki/summaries`` has at least
+        one file (i.e. some document has been ingested + compiled)
+
+    Returns ``None`` if all gates pass, else a single-line error message
+    suitable to print to the user.
+
+    Overwrite handling is NOT done here — the CLI handles it with
+    ``-y`` + ``click.confirm``; chat refuses overwrite outright.
     """
     err = _validate_skill_name(name)
     if err:
@@ -207,7 +206,6 @@ def _preflight_skill_new(
 
 def _clear_existing_skill_dir(kb_dir: Path, name: str) -> None:
     """Delete an existing ``<kb>/output/skills/<name>/`` directory."""
-    import shutil
     target = kb_dir / "output" / "skills" / name
     if target.exists():
         shutil.rmtree(target)
@@ -1440,20 +1438,29 @@ def skill_new(ctx, name, intent, yes_flag):
 
       openkb skill new karpathy-thinking "Reason about transformers like Karpathy"
     """
-    import asyncio
-    import sys
-
     kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
     if kb_dir is None:
         click.echo("No knowledge base found. Run `openkb init` first.", err=True)
         ctx.exit(1)
 
-    err = _preflight_skill_new(kb_dir, name, yes_flag)
+    err = _preflight_skill_new(kb_dir, name)
     if err:
         click.echo(f"[ERROR] {err}", err=True)
         ctx.exit(1)
 
-    # Overwrite handling (CLI-specific)
+    # Verify LLM key + load config BEFORE touching existing output. Any
+    # failure here (missing API key, malformed config) must leave the old
+    # skill directory intact — we can't replace it if we can't proceed.
+    try:
+        _setup_llm_key(kb_dir)
+    except RuntimeError as exc:
+        click.echo(f"[ERROR] {exc}", err=True)
+        ctx.exit(1)
+    config = load_config(kb_dir / ".openkb" / "config.yaml")
+    model = config.get("model", DEFAULT_CONFIG["model"])
+
+    # Overwrite handling (CLI-specific). Done AFTER key/config so a
+    # missing key doesn't wipe the user's existing skill output.
     target = kb_dir / "output" / "skills" / name
     if target.exists():
         if yes_flag:
@@ -1474,11 +1481,6 @@ def skill_new(ctx, name, intent, yes_flag):
             )
             ctx.exit(1)
 
-    # Load config + key
-    _setup_llm_key(kb_dir)
-    config = load_config(kb_dir / ".openkb" / "config.yaml")
-    model = config.get("model", DEFAULT_CONFIG["model"])
-
     # Run the generator
     from openkb.generator import Generator
     click.echo(f"Compiling skill '{name}'...")
diff --git a/openkb/generator.py b/openkb/generator.py
index f24b4b73..843a1b06 100644
--- a/openkb/generator.py
+++ b/openkb/generator.py
@@ -9,14 +9,14 @@
   distribution mechanic as skills)
 
 Each target plugs in its own `run` coroutine. v0.1's only entry calls into
-``openkb.agent.skill_compiler.run_skill_compile``.
+``openkb.agent.skill_creator.run_skill_create``.
 """
 from __future__ import annotations
 
 from pathlib import Path
 from typing import Literal
 
-from openkb.agent.skill_compiler import run_skill_compile
+from openkb.agent.skill_creator import run_skill_create
 from openkb.marketplace import regenerate_marketplace
 
 
@@ -56,7 +56,7 @@ def __init__(
 
     async def run(self) -> Path:
         """Execute the generator. Returns the path to the produced artifact."""
-        await run_skill_compile(
+        await run_skill_create(
             kb_dir=self.kb_dir,
             skill_name=self.name,
             intent=self.intent,
diff --git a/openkb/marketplace.py b/openkb/marketplace.py
index e78df148..394952e7 100644
--- a/openkb/marketplace.py
+++ b/openkb/marketplace.py
@@ -1,17 +1,22 @@
 """Regenerate the per-KB Claude Code plugin marketplace manifest.
 
-After every `openkb skill new` (and after any chat-side edit to a SKILL.md
-frontmatter), this module scans ``<kb>/output/skills/*/SKILL.md`` and
-rewrites ``<kb>/.claude-plugin/marketplace.json`` listing all currently
+After every successful skill generation (CLI ``openkb skill new`` or
+chat ``/skill new``, both via ``Generator.run``), this module scans
+``<kb>/output/skills/*/SKILL.md`` and rewrites
+``<kb>/.claude-plugin/marketplace.json`` listing all currently
 compiled skills.
 
 The schema is a subset compatible with the OpenKB repo's own
 ``.claude-plugin/marketplace.json``: one plugin entry per KB, with a
-``skills`` array of relative paths. ``owner`` is derived from git config
-so Claude Code's ``/plugin marketplace add`` accepts the manifest. Other
-agent CLIs (``npx skills add``) install from the same file.
-
-This is a deterministic step — no LLM calls.
+``skills`` array of relative paths. ``owner`` is derived from git
+config (run with cwd=kb_dir) so Claude Code's ``/plugin marketplace
+add`` accepts the manifest. Other agent CLIs (``npx skills add``)
+install from the same file.
+
+This is a deterministic step — no LLM calls. If a chat-session edit
+to a SKILL.md changes the description after compile, the manifest is
+NOT auto-regenerated; re-run ``openkb skill new`` or
+``/skill new`` to refresh it.
 """
 from __future__ import annotations
 
diff --git a/openkb/prompts/__init__.py b/openkb/prompts/__init__.py
index c28b2b1d..b5beda88 100644
--- a/openkb/prompts/__init__.py
+++ b/openkb/prompts/__init__.py
@@ -15,7 +15,7 @@ def load_prompt(name: str) -> str:
     """Return the contents of ``openkb/prompts/<name>.md`` as a string.
 
     Args:
-        name: Filename without the ``.md`` suffix (e.g. ``"skill_compile"``).
+        name: Filename without the ``.md`` suffix (e.g. ``"skill_create"``).
     """
     path = _PROMPTS_DIR / f"{name}.md"
     return path.read_text(encoding="utf-8")
diff --git a/openkb/prompts/skill_compile.md b/openkb/prompts/skill_create.md
similarity index 97%
rename from openkb/prompts/skill_compile.md
rename to openkb/prompts/skill_create.md
index 7023bf80..bc222ef5 100644
--- a/openkb/prompts/skill_compile.md
+++ b/openkb/prompts/skill_create.md
@@ -1,4 +1,4 @@
-You are the OpenKB skill-compile agent. Your job: read the knowledge base
+You are the OpenKB skill-create agent. Your job: read the knowledge base
 wiki at `<kb>/wiki/` and produce a redistributable Anthropic Skill at
 `<kb>/output/skills/{skill_name}/`. Other agents — Claude Code, Codex CLI,
 Gemini CLI, Cursor — will install this skill and load it on demand, so the
diff --git a/tests/test_generator.py b/tests/test_generator.py
index b2e7d0de..1ef179f5 100644
--- a/tests/test_generator.py
+++ b/tests/test_generator.py
@@ -44,7 +44,7 @@ def test_generator_skill_target_constructs_ok(tmp_path):
 
 
 @pytest.mark.asyncio
-async def test_generator_run_delegates_to_skill_compiler(tmp_path):
+async def test_generator_run_delegates_to_skill_creator(tmp_path):
     kb = _make_kb(tmp_path)
     g = Generator(
         target_type="skill",
@@ -53,7 +53,7 @@ async def test_generator_run_delegates_to_skill_compiler(tmp_path):
         kb_dir=kb,
         model="gpt-4o-mini",
     )
-    with patch("openkb.generator.run_skill_compile", new=AsyncMock()) as runner, \
+    with patch("openkb.generator.run_skill_create", new=AsyncMock()) as runner, \
          patch("openkb.generator.regenerate_marketplace") as regen:
         await g.run()
     runner.assert_awaited_once()
diff --git a/tests/test_marketplace.py b/tests/test_marketplace.py
index dee118d5..a34c34ba 100644
--- a/tests/test_marketplace.py
+++ b/tests/test_marketplace.py
@@ -95,11 +95,13 @@ def test_regenerate_reads_description_from_frontmatter(tmp_path):
     regenerate_marketplace(kb)
 
     manifest = json.loads((kb / ".claude-plugin" / "marketplace.json").read_text())
-    # Manifest's metadata.description should at minimum mention the KB context;
-    # the per-skill description lives in the SKILL.md itself (the manifest just
-    # points at the skill directories).
-    assert "marketplace.json" in str(kb / ".claude-plugin" / "marketplace.json")
-    # Reload SKILL.md to confirm description preserved
+    # Per-skill descriptions live in the SKILL.md frontmatter, NOT in the
+    # marketplace manifest. The manifest just points at skill directories;
+    # the loading agent (Claude Code, npx skills) reads each SKILL.md to
+    # discover the per-skill description. Locking this in: the description
+    # must NOT leak into the top-level manifest metadata.
+    assert "the specific description goes here" not in manifest["metadata"]["description"]
+    # And the description IS preserved on disk in SKILL.md
     skill_md = (kb / "output" / "skills" / "demo" / "SKILL.md").read_text()
     assert "the specific description goes here" in skill_md
 
diff --git a/tests/test_skill_chat_slash.py b/tests/test_skill_chat_slash.py
index 377f1a91..100135a2 100644
--- a/tests/test_skill_chat_slash.py
+++ b/tests/test_skill_chat_slash.py
@@ -36,7 +36,7 @@ async def fake_run(kb_dir, skill_name, intent, model):
             f"---\nname: {skill_name}\ndescription: t\n---\n\n# {skill_name}\n"
         )
 
-    with patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)):
+    with patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
         action = await _handle_slash(
             '/skill new demo "test intent"', kb, session, style
         )
diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py
index 297f9e1d..4004450d 100644
--- a/tests/test_skill_cli.py
+++ b/tests/test_skill_cli.py
@@ -8,7 +8,6 @@
 import json
 from unittest.mock import AsyncMock, patch
 
-import pytest
 from click.testing import CliRunner
 
 from openkb.cli import cli
@@ -27,7 +26,7 @@ def _make_kb(tmp_path):
 
 
 def _fake_compile(kb_dir, skill_name, **_kw):
-    """Side-effect for the patched run_skill_compile: write a minimal SKILL.md."""
+    """Side-effect for the patched run_skill_create: write a minimal SKILL.md."""
     target = kb_dir / "output" / "skills" / skill_name
     target.mkdir(parents=True, exist_ok=True)
     (target / "SKILL.md").write_text(
@@ -43,7 +42,7 @@ async def fake_run(kb_dir, skill_name, intent, model):
         _fake_compile(kb_dir, skill_name)
 
     with patch("openkb.cli._find_kb_dir", return_value=kb), \
-         patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)):
+         patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
         result = runner.invoke(cli, ["skill", "new", "demo", "test intent"])
 
     assert result.exit_code == 0, result.output
@@ -124,9 +123,28 @@ async def fake_run(kb_dir, skill_name, intent, model):
         _fake_compile(kb_dir, skill_name)
 
     with patch("openkb.cli._find_kb_dir", return_value=kb), \
-         patch("openkb.generator.run_skill_compile", new=AsyncMock(side_effect=fake_run)):
+         patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
         result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"])
 
     assert result.exit_code == 0, result.output
     assert not (kb / "output" / "skills" / "demo" / "stale.txt").exists()
     assert (kb / "output" / "skills" / "demo" / "SKILL.md").exists()
+
+
+def test_skill_new_keeps_existing_skill_when_key_setup_fails(tmp_path):
+    """If LLM key setup raises (e.g. no API key), the old skill output
+    must be preserved — don't rmtree before key setup is verified."""
+    kb = _make_kb(tmp_path)
+    target = kb / "output" / "skills" / "demo"
+    target.mkdir(parents=True)
+    (target / "stale.txt").write_text("priceless")
+
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb), \
+         patch("openkb.cli._setup_llm_key",
+               side_effect=RuntimeError("no API key configured")):
+        result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"])
+
+    assert result.exit_code != 0
+    # Old skill must still be there
+    assert (target / "stale.txt").read_text() == "priceless"
diff --git a/tests/test_skill_compiler.py b/tests/test_skill_creator.py
similarity index 59%
rename from tests/test_skill_compiler.py
rename to tests/test_skill_creator.py
index 6fd06f22..e166c418 100644
--- a/tests/test_skill_compiler.py
+++ b/tests/test_skill_creator.py
@@ -1,4 +1,4 @@
-"""Tests for openkb.agent.skill_compiler.
+"""Tests for openkb.agent.skill_creator.
 
 The agent itself is mocked (we don't want to spend tokens in unit tests).
 What we DO test:
@@ -6,15 +6,18 @@
   * System prompt gets the intent and skill_name interpolated
   * The skill output dir is created before the agent starts
   * If the agent finishes without writing SKILL.md, we surface an error
+  * If the SDK hits its max-turns cap, we translate to RuntimeError so
+    the CLI/chat call sites (which only catch RuntimeError) print a
+    friendly message instead of leaking a traceback.
 """
 from __future__ import annotations
 
 import pytest
 from unittest.mock import AsyncMock, patch
 
-from openkb.agent.skill_compiler import (
-    build_skill_compile_agent,
-    run_skill_compile,
+from openkb.agent.skill_creator import (
+    build_skill_create_agent,
+    run_skill_create,
 )
 
 
@@ -29,7 +32,7 @@ def _make_kb(tmp_path):
 
 def test_build_agent_interpolates_intent_and_name(tmp_path):
     kb = _make_kb(tmp_path)
-    agent = build_skill_compile_agent(
+    agent = build_skill_create_agent(
         wiki_root=str(kb / "wiki"),
         skill_root=str(kb / "output" / "skills" / "demo"),
         skill_name="demo",
@@ -41,7 +44,7 @@ def test_build_agent_interpolates_intent_and_name(tmp_path):
 
 
 @pytest.mark.asyncio
-async def test_run_skill_compile_creates_output_dir(tmp_path):
+async def test_run_skill_create_creates_output_dir(tmp_path):
     kb = _make_kb(tmp_path)
     target = kb / "output" / "skills" / "demo"
     # Fake the agent run: just write a minimal SKILL.md to simulate success.
@@ -53,8 +56,8 @@ async def fake_runner(*args, **kwargs):
         from types import SimpleNamespace
         return SimpleNamespace(final_output="done")
 
-    with patch("openkb.agent.skill_compiler.Runner.run", new=AsyncMock(side_effect=fake_runner)):
-        await run_skill_compile(
+    with patch("openkb.agent.skill_creator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+        await run_skill_create(
             kb_dir=kb,
             skill_name="demo",
             intent="test intent",
@@ -65,7 +68,7 @@ async def fake_runner(*args, **kwargs):
 
 
 @pytest.mark.asyncio
-async def test_run_skill_compile_raises_when_no_skill_md_written(tmp_path):
+async def test_run_skill_create_raises_when_no_skill_md_written(tmp_path):
     kb = _make_kb(tmp_path)
     target = kb / "output" / "skills" / "demo"
     target.mkdir(parents=True, exist_ok=True)
@@ -74,11 +77,33 @@ async def fake_runner(*args, **kwargs):
         from types import SimpleNamespace
         return SimpleNamespace(final_output="done")
 
-    with patch("openkb.agent.skill_compiler.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.agent.skill_creator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         with pytest.raises(RuntimeError, match="did not write SKILL.md"):
-            await run_skill_compile(
+            await run_skill_create(
                 kb_dir=kb,
                 skill_name="demo",
                 intent="test intent",
                 model="gpt-4o-mini",
             )
+
+
+@pytest.mark.asyncio
+async def test_run_skill_create_translates_max_turns_to_runtime_error(tmp_path):
+    """MaxTurnsExceeded from the SDK should be re-raised as a RuntimeError
+    with a user-friendly message — otherwise both CLI and chat call sites
+    (which only catch RuntimeError) leak a Python traceback."""
+    from agents.exceptions import MaxTurnsExceeded
+    kb = _make_kb(tmp_path)
+
+    async def fake_runner(*args, **kwargs):
+        raise MaxTurnsExceeded("agent ran out of turns")
+
+    with patch("openkb.agent.skill_creator.Runner.run",
+               new=AsyncMock(side_effect=fake_runner)):
+        with pytest.raises(RuntimeError, match="step cap"):
+            await run_skill_create(
+                kb_dir=kb,
+                skill_name="demo",
+                intent="x",
+                model="gpt-4o-mini",
+            )
diff --git a/tests/test_skill_tools.py b/tests/test_skill_tools.py
index 3c620c97..f5db686d 100644
--- a/tests/test_skill_tools.py
+++ b/tests/test_skill_tools.py
@@ -1,8 +1,6 @@
-"""Tests for openkb.agent.skill_tools — path-scoped IO for the skill compile agent."""
+"""Tests for openkb.agent.skill_tools — path-scoped IO for the skill-create agent."""
 from __future__ import annotations
 
-import pytest
-
 from openkb.agent.skill_tools import (
     list_wiki_dir,
     read_wiki_file_for_skill,

From dbd78e69799b4ae34442f6b86660912b8198f44f Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 15:39:34 +0800
Subject: [PATCH 18/25] docs(readme): restructure into Wiki Foundation +
 Generators layers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reframe the project mental model: the wiki is the substrate, and
several primitives (query, chat, skill new) generate output from it.
Add 'Drop in a book. Out comes a digital expert.' slogan to the
Skill Factory subsection.

- Features list split into 'Wiki foundation' (compile + maintain) and
  'Generators' (query / chat / Skill Factory)
- Quick Start adds step 6 — distill a skill
- Architecture diagram extended to show the wiki branching into
  query/chat + Skill Factory + future generators (ppt/podcast/…)
- Usage section regrouped under 🧱 Wiki Foundation and ✨ Generators
- Skill Factory subsection promoted with the slogan as its heading
  and a concrete example folder tree
- Chat slash command list updated to include /skill new
---
 README.md | 121 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 77 insertions(+), 44 deletions(-)

diff --git a/README.md b/README.md
index 41278d29..a8010e02 100644
--- a/README.md
+++ b/README.md
@@ -24,16 +24,22 @@ Traditional RAG rediscovers knowledge from scratch on every query. Nothing accum
 
 ### Features
 
+OpenKB has two layers: a **wiki foundation** that compiles and maintains your knowledge, and **generators** that turn that wiki into useful artifacts.
+
+**Wiki foundation** — compile and maintain
 - **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel, text, and more via markitdown
-- **Scale to long documents** — Long and complex documents are handled via [PageIndex](https://github.com/VectifyAI/PageIndex) tree indexing, enabling accurate, vectorless long-context retrieval
+- **Scale to long documents** — Handled via [PageIndex](https://github.com/VectifyAI/PageIndex) tree indexing, enabling accurate, vectorless long-context retrieval
 - **Native multi-modality** — Retrieves and understands figures, tables, and images, not just text
-- **Compiled Wiki** — LLM manages and compiles your documents into summaries, concept pages, and cross-links, all kept in sync
-- **Query** — Ask questions (one-off) against your wiki. The LLM navigates your compiled knowledge to answer
-- **Interactive Chat** — Multi-turn conversations with persisted sessions you can resume across runs
+- **Compiled Wiki** — LLM compiles documents into summaries, concept pages, and cross-links, all kept in sync
 - **Lint** — Health checks find contradictions, gaps, orphans, and stale content
 - **Watch mode** — Drop files into `raw/`, wiki updates automatically
 - **Obsidian compatible** — Wiki is plain `.md` files with `[[wikilinks]]`. Open in Obsidian for graph view and browsing
 
+**Generators** — turn the wiki into something useful
+- **Query** — Ask questions (one-off) against your wiki. The LLM navigates compiled knowledge to answer
+- **Chat** — Multi-turn conversations with persisted sessions you can resume across runs
+- **Skill Factory** — *Drop in a book. Out comes a digital expert.* Compile any subset of your wiki into a redistributable [Anthropic Skill](https://docs.claude.com/en/docs/build-with-claude/skills) installable in Claude Code / Codex / Gemini CLI / Cursor
+
 # 🚀 Getting Started
 
 ### Install
@@ -80,6 +86,9 @@ openkb query "What are the main findings?"
 
 # 5. Or chat interactively
 openkb chat
+
+# 6. Or distill your wiki into a redistributable skill
+openkb skill new my-expert "Reason like an expert on <topic-from-your-docs>"
 ```
 
 ### Set up your LLM
@@ -109,7 +118,7 @@ raw/                              You drop files here
  │                         Wiki Compilation (using LLM)
  │                                     │
  ▼                                     ▼
-wiki/
+wiki/                                  │            ← the foundation
  ├── index.md            Knowledge base overview
  ├── log.md              Operations timeline
  ├── AGENTS.md           Wiki schema (LLM instructions)
@@ -118,6 +127,13 @@ wiki/
  ├── concepts/           Cross-document synthesis ← the good stuff
  ├── explorations/       Saved query results
  └── reports/            Lint reports
+                                       │
+                ┌──────────────────────┼──────────────────────┐
+                ▼                      ▼                      ▼
+            query / chat         Skill Factory          (future)
+          (LLM answers from     openkb skill new       ppt / podcast /
+            the wiki)           → output/skills/        report / …
+                                + marketplace.json
 ```
 
 ### Short vs. Long Document Handling
@@ -144,16 +160,15 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume
 
 # ⚙️ Usage
 
-### Commands
+OpenKB commands fall into two layers: the **wiki foundation** (compile + manage your knowledge) and **generators** (turn that wiki into useful output).
+
+## 🧱 Wiki Foundation — compile and maintain
 
 | Command | Description |
 |---|---|
 | `openkb init` | Initialize a new knowledge base (interactive) |
 | <code>openkb&nbsp;add&nbsp;&lt;file_or_dir_or_URL&gt;</code> | Add documents and compile to wiki. URL ingest auto-detects PDF (saved as `.pdf` → PageIndex / markitdown) vs HTML (trafilatura main-content extract → `.md`) |
-| <code>openkb&nbsp;skill&nbsp;new&nbsp;&lt;name&gt;&nbsp;"&lt;intent&gt;"</code> | Compile a skill from this KB's wiki into `output/skills/<name>/` and update the marketplace manifest |
 | <code>openkb&nbsp;remove&nbsp;&lt;doc&gt;</code> | Remove a document and clean up its wiki pages, images, registry, and PageIndex state (use `--dry-run` to preview, `--keep-raw` / `--keep-empty-concepts` to retain artifacts) |
-| <code>openkb&nbsp;query&nbsp;"question"</code> | Ask a question over the knowledge base (use `--save` to save the answer to `wiki/explorations/`) |
-| `openkb chat` | Start an interactive multi-turn chat (use `--resume`, `--list`, `--delete` to manage sessions) |
 | `openkb watch` | Watch `raw/` and auto-compile new files |
 | `openkb lint` | Run structural + knowledge health checks |
 | `openkb list` | List indexed documents and concepts |
@@ -162,20 +177,63 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume
 
 <!-- | `openkb lint --fix` | Auto-fix what it can | -->
 
-### Skills — compile your wiki into a redistributable skill
+## ✨ Generators — turn the wiki into output
+
+A "generator" reads from the compiled wiki and produces something usable: an answer, a conversation, a skill folder. The wiki is the substrate; generators are the surfaces.
+
+| Command | Output |
+|---|---|
+| <code>openkb&nbsp;query&nbsp;"question"</code> | A grounded answer with citations (use `--save` to persist to `wiki/explorations/`) |
+| `openkb chat` | Interactive multi-turn session over the wiki (use `--resume`, `--list`, `--delete` to manage sessions) |
+| <code>openkb&nbsp;skill&nbsp;new&nbsp;&lt;name&gt;&nbsp;"&lt;intent&gt;"</code> | A redistributable Anthropic Skill at `<kb>/output/skills/<name>/` + auto-updated `marketplace.json` |
+
+### Query & Chat — ask the wiki
+
+`openkb query "..."` answers a single question. `openkb chat` is interactive — each turn carries history, so you can dig into a topic without re-typing context. Both use the same underlying wiki and the same retrieval primitives (PageIndex for long docs, direct concept reads for short).
+
+```bash
+openkb query "What does the literature say about attention scaling?"
+
+openkb chat                       # start a new session
+openkb chat --resume              # resume the most recent session
+openkb chat --resume 20260411     # resume by id (unique prefix works)
+openkb chat --list                # list all sessions
+openkb chat --delete <id>         # delete a session
+```
+
+Inside a chat, type `/` to access slash commands (Tab to complete):
+
+- `/help` — list available commands
+- `/status` — show knowledge base status
+- `/list` — list all documents
+- `/add <path>` — add a document or directory without leaving the chat
+- `/skill new <name> "<intent>"` — compile a skill from this chat (see below)
+- `/save [name]` — export the transcript to `wiki/explorations/`
+- `/clear` — start a fresh session (the current one stays on disk)
+- `/lint` — run knowledge base lint
+- `/exit` — exit (Ctrl-D also works)
+
+### 🛠 Skill Factory — *Drop in a book. Out comes a digital expert.*
 
-Once you have a populated wiki, you can compile a subset of it into an
-**Anthropic Skill** — a portable folder that Claude Code, Codex CLI,
-Gemini CLI, and Cursor all know how to load.
+The newest generator. `openkb skill new` distills any subset of your wiki into an [Anthropic Skill](https://docs.claude.com/en/docs/build-with-claude/skills) — a portable folder that **Claude Code, Codex CLI, Gemini CLI, and Cursor** all install and load natively. Drop in a book's worth of papers; out comes a specialist that other agents can call on.
 
 ```bash
 openkb skill new karpathy-thinking \
   "Reason about transformers and attention in Karpathy's style"
 ```
 
-This produces `output/skills/karpathy-thinking/` with `SKILL.md`,
-optional `references/`, and an auto-updated
-`.claude-plugin/marketplace.json` for distribution.
+This produces:
+
+```
+<kb>/output/skills/karpathy-thinking/
+├── SKILL.md                   # YAML frontmatter + when-to-use + approach
+├── references/                # depth material the agent loads on demand
+│   ├── methodology.md
+│   └── key-quotes.md
+└── (scripts/)                 # optional, only if intent implies computation
+```
+
+…plus an auto-updated `<kb>/.claude-plugin/marketplace.json` so the whole KB is one-line installable.
 
 **Install locally:**
 
@@ -183,15 +241,13 @@ optional `references/`, and an auto-updated
 cp -r output/skills/karpathy-thinking ~/.claude/skills/
 ```
 
-**Share with others:**
-
-Push your KB directory to GitHub, then anyone can install all your skills with one command:
+**Share with others** — push your KB to GitHub, then anyone runs:
 
 ```bash
 npx skills@latest add <your-org>/<your-repo>
 ```
 
-You can also iterate inside chat:
+**Iterate from chat** — compilation is one-shot, but follow-up edits aren't. Inside `openkb chat`, you can refine without re-running the whole pipeline:
 
 ```
 /skill new karpathy-thinking "Reason about transformers like Karpathy"
@@ -200,30 +256,7 @@ You can also iterate inside chat:
 [agent edits SKILL.md frontmatter in place]
 ```
 
-See [CONTRIBUTING.md](CONTRIBUTING.md) for how to submit your compiled skill back to the OpenKB community registry.
-
-### Interactive Chat
-
-`openkb chat` opens an interactive chat session over your wiki knowledge base. Unlike the one-shot `openkb query`, each turn carries the conversation history, so you can dig into a topic without re-typing context.
-
-```bash
-openkb chat                       # start a new session
-openkb chat --resume              # resume the most recent session
-openkb chat --resume 20260411     # resume by id (unique prefix works)
-openkb chat --list                # list all sessions
-openkb chat --delete <id>         # delete a session
-```
-
-Inside a chat, type `/` to access slash commands (Tab to complete):
-
-- `/help` — list available commands
-- `/status` — show knowledge base status
-- `/list` — list all documents
-- `/add <path>` — add a document or directory without leaving the chat
-- `/save [name]` — export the transcript to `wiki/explorations/`
-- `/clear` — start a fresh session (the current one stays on disk)
-- `/lint` — run knowledge base lint
-- `/exit` — exit (Ctrl-D also works)
+See [CONTRIBUTING.md](CONTRIBUTING.md) for how to submit your compiled skill back to the community registry at [VectifyAI/OpenKB](https://github.com/VectifyAI/OpenKB).
 
 ### Configuration
 

From 10c19d89c855c59d89a545134d0402287233e239 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 16:09:15 +0800
Subject: [PATCH 19/25] =?UTF-8?q?feat(skill):=20iteration=20workspace=20?=
 =?UTF-8?q?=E2=80=94=20preserve=20history,=20rollback=20support?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Borrows from Anthropic's skill-creator: each time 'openkb skill new -y'
would overwrite an existing skill, the old version is copied to
<kb>/output/skills/<name>-workspace/iteration-N/ instead of being
destroyed. Iteration numbers monotonically increase. Each iteration
carries a diff.md showing description + reference-file delta vs the
previous version.

New commands:
  openkb skill history <name>         list past iterations
  openkb skill rollback <name>        restore latest iteration
  openkb skill rollback --to N        restore specific iteration

Tests cover: iteration numbering, restore-from-N, restore-from-latest,
diff content (description / added refs / removed refs).
---
 openkb/cli.py                 | 151 +++++++++++++++++++++++
 openkb/skill_workspace.py     | 219 ++++++++++++++++++++++++++++++++++
 tests/test_skill_cli.py       | 121 +++++++++++++++++++
 tests/test_skill_workspace.py | 153 ++++++++++++++++++++++++
 4 files changed, 644 insertions(+)
 create mode 100644 openkb/skill_workspace.py
 create mode 100644 tests/test_skill_workspace.py

diff --git a/openkb/cli.py b/openkb/cli.py
index 5e1a26be..66496a3d 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -1461,9 +1461,18 @@ def skill_new(ctx, name, intent, yes_flag):
 
     # Overwrite handling (CLI-specific). Done AFTER key/config so a
     # missing key doesn't wipe the user's existing skill output.
+    #
+    # When overwriting, we don't destroy the old skill — we copy it
+    # into <kb>/output/skills/<name>-workspace/iteration-N/ first, so
+    # the user can roll back via `openkb skill rollback`. See
+    # ``openkb/skill_workspace.py``.
+    from openkb.skill_workspace import save_iteration, write_diff
+
     target = kb_dir / "output" / "skills" / name
+    saved_iteration: Path | None = None
     if target.exists():
         if yes_flag:
+            saved_iteration = save_iteration(kb_dir, name)
             _clear_existing_skill_dir(kb_dir, name)
         elif sys.stdin.isatty():
             if not click.confirm(
@@ -1472,6 +1481,7 @@ def skill_new(ctx, name, intent, yes_flag):
             ):
                 click.echo("Aborted.")
                 ctx.exit(1)
+            saved_iteration = save_iteration(kb_dir, name)
             _clear_existing_skill_dir(kb_dir, name)
         else:
             click.echo(
@@ -1497,9 +1507,150 @@ def skill_new(ctx, name, intent, yes_flag):
         click.echo(f"[ERROR] {exc}", err=True)
         ctx.exit(1)
 
+    # Drop a structural diff inside the saved iteration so the user
+    # can see what changed since the previous compile.
+    if saved_iteration is not None:
+        try:
+            write_diff(saved_iteration, target, saved_iteration / "diff.md")
+        except Exception as exc:  # diff is best-effort; never block success
+            logging.getLogger(__name__).debug(
+                "diff generation failed: %s", exc, exc_info=True
+            )
+
     click.echo(f"\nSaved: output/skills/{name}/")
+    if saved_iteration is not None:
+        rel = saved_iteration.relative_to(kb_dir)
+        click.echo(f"Previous version: {rel}/  (run `openkb skill rollback {name}` to restore)")
     click.echo(f"Manifest: .claude-plugin/marketplace.json updated")
     click.echo(f"\nInstall locally:")
     click.echo(f"  cp -r output/skills/{name} ~/.claude/skills/")
     click.echo(f"\nShare (push KB to GitHub, then):")
     click.echo(f"  npx skills@latest add <owner>/<repo>")
+
+
+@skill.command("history")
+@click.argument("name")
+@click.pass_context
+def skill_history(ctx, name):
+    """List previous iterations of a skill."""
+    import datetime as _dt
+
+    from openkb.skill_workspace import list_iterations
+
+    kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
+    if kb_dir is None:
+        click.echo("No knowledge base found. Run `openkb init` first.", err=True)
+        ctx.exit(1)
+
+    err = _validate_skill_name(name)
+    if err:
+        click.echo(f"[ERROR] {err}", err=True)
+        ctx.exit(1)
+
+    iters = list_iterations(kb_dir, name)
+    if not iters:
+        click.echo(f"No previous iterations for '{name}'.")
+        return
+
+    click.echo(f"Iterations of '{name}' ({len(iters)} total):\n")
+    click.echo("  N  Path                                                  Created")
+    click.echo("  -  --------------------------------------------------    -------")
+    for path in iters:
+        n = int(path.name.split("-", 1)[1])
+        rel = path.relative_to(kb_dir)
+        try:
+            mtime = _dt.datetime.fromtimestamp(path.stat().st_mtime)
+            stamp = mtime.strftime("%Y-%m-%d %H:%M")
+        except OSError:
+            stamp = "-"
+        click.echo(f"  {n}  {rel}  {stamp}")
+
+    current = kb_dir / "output" / "skills" / name
+    if current.is_dir():
+        rel_curr = current.relative_to(kb_dir)
+        click.echo(f"\n  Current: {rel_curr}/")
+
+    latest_n = int(iters[-1].name.split("-", 1)[1])
+    click.echo("\nRestore an iteration:")
+    click.echo(
+        f"  openkb skill rollback {name}          # restore latest (iteration-{latest_n})"
+    )
+    click.echo(
+        f"  openkb skill rollback {name} --to 1   # restore iteration-1"
+    )
+
+
+@skill.command("rollback")
+@click.argument("name")
+@click.option(
+    "--to", "to_n",
+    default=None, type=int,
+    help="Iteration number to restore. Defaults to latest.",
+)
+@click.option(
+    "-y", "--yes", "yes_flag",
+    is_flag=True, default=False,
+    help="Skip confirmation.",
+)
+@click.pass_context
+def skill_rollback(ctx, name, to_n, yes_flag):
+    """Restore a previous iteration as the current skill."""
+    from openkb.marketplace import regenerate_marketplace
+    from openkb.skill_workspace import list_iterations, restore_iteration
+
+    kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
+    if kb_dir is None:
+        click.echo("No knowledge base found. Run `openkb init` first.", err=True)
+        ctx.exit(1)
+
+    err = _validate_skill_name(name)
+    if err:
+        click.echo(f"[ERROR] {err}", err=True)
+        ctx.exit(1)
+
+    iters = list_iterations(kb_dir, name)
+    if not iters:
+        click.echo(
+            f"[ERROR] No iterations exist for '{name}'. Nothing to roll back.",
+            err=True,
+        )
+        ctx.exit(1)
+
+    target_n = to_n if to_n is not None else int(iters[-1].name.split("-", 1)[1])
+    target_label = f"iteration-{target_n}"
+    if not any(p.name == target_label for p in iters):
+        click.echo(
+            f"[ERROR] Iteration {target_n} not found for '{name}'. "
+            f"Run `openkb skill history {name}` to see available iterations.",
+            err=True,
+        )
+        ctx.exit(1)
+
+    current = kb_dir / "output" / "skills" / name
+    if current.exists():
+        prompt = (
+            f"This will overwrite output/skills/{name}/ with {target_label}. Continue?"
+        )
+        if yes_flag:
+            pass
+        elif sys.stdin.isatty():
+            if not click.confirm(prompt, default=False):
+                click.echo("Aborted.")
+                ctx.exit(1)
+        else:
+            click.echo(
+                f"[ERROR] output/skills/{name}/ exists. Pass -y to overwrite "
+                f"in non-interactive contexts.",
+                err=True,
+            )
+            ctx.exit(1)
+
+    try:
+        restore_iteration(kb_dir, name, n=to_n)
+    except FileNotFoundError as exc:
+        click.echo(f"[ERROR] {exc}", err=True)
+        ctx.exit(1)
+
+    regenerate_marketplace(kb_dir)
+    click.echo(f"Restored output/skills/{name}/ from {target_label}.")
+    click.echo("Manifest: .claude-plugin/marketplace.json updated")
diff --git a/openkb/skill_workspace.py b/openkb/skill_workspace.py
new file mode 100644
index 00000000..f82a42c7
--- /dev/null
+++ b/openkb/skill_workspace.py
@@ -0,0 +1,219 @@
+"""Skill iteration workspace — preserve history, enable rollback.
+
+When ``openkb skill new <name> -y`` would overwrite an existing skill,
+the CLI calls :func:`save_iteration` first to copy the current skill
+directory into a parallel ``<kb>/output/skills/<name>-workspace/`` tree
+under ``iteration-N/``. Iteration numbers monotonically increase across
+overwrites, so no work is destroyed by a new compile.
+
+After the new skill is generated, :func:`write_diff` drops a
+``diff.md`` inside the saved iteration capturing the structural delta
+(description change, ref/script add/remove, SKILL.md line-count delta).
+
+Borrowed from Anthropic's `skill-creator` iteration pattern:
+https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md
+"""
+from __future__ import annotations
+
+import re
+import shutil
+from pathlib import Path
+
+__all__ = [
+    "save_iteration",
+    "list_iterations",
+    "restore_iteration",
+    "write_diff",
+]
+
+
+def _skill_dir(kb_dir: Path, skill_name: str) -> Path:
+    return kb_dir / "output" / "skills" / skill_name
+
+
+def _workspace_dir(kb_dir: Path, skill_name: str) -> Path:
+    return kb_dir / "output" / "skills" / f"{skill_name}-workspace"
+
+
+_ITER_RE = re.compile(r"^iteration-(\d+)$")
+
+
+def _iter_number(path: Path) -> int | None:
+    m = _ITER_RE.match(path.name)
+    return int(m.group(1)) if m else None
+
+
+def list_iterations(kb_dir: Path, skill_name: str) -> list[Path]:
+    """List existing iteration directories for a skill, sorted by N ascending.
+
+    Returns an empty list if the workspace doesn't exist.
+    """
+    ws = _workspace_dir(kb_dir, skill_name)
+    if not ws.is_dir():
+        return []
+    iters: list[tuple[int, Path]] = []
+    for child in ws.iterdir():
+        if not child.is_dir():
+            continue
+        n = _iter_number(child)
+        if n is None:
+            continue
+        iters.append((n, child))
+    iters.sort(key=lambda t: t[0])
+    return [p for _, p in iters]
+
+
+def save_iteration(kb_dir: Path, skill_name: str) -> Path | None:
+    """Copy current ``<kb>/output/skills/<name>/`` to the next iteration slot.
+
+    Returns the saved iteration path, or ``None`` if there's no current
+    skill to save (first compile of a new name).
+    """
+    src = _skill_dir(kb_dir, skill_name)
+    if not src.is_dir():
+        return None
+
+    existing = list_iterations(kb_dir, skill_name)
+    next_n = (max((_iter_number(p) for p in existing), default=0) or 0) + 1
+
+    ws = _workspace_dir(kb_dir, skill_name)
+    ws.mkdir(parents=True, exist_ok=True)
+    dest = ws / f"iteration-{next_n}"
+    shutil.copytree(src, dest)
+    return dest
+
+
+def restore_iteration(
+    kb_dir: Path, skill_name: str, n: int | None = None
+) -> Path:
+    """Restore an iteration as the current skill.
+
+    If ``n`` is ``None``, restore the highest-numbered iteration. Raises
+    ``FileNotFoundError`` if the requested iteration doesn't exist.
+
+    Returns the path to the restored skill directory.
+    """
+    iters = list_iterations(kb_dir, skill_name)
+    if not iters:
+        raise FileNotFoundError(
+            f"No iterations exist for skill {skill_name!r}."
+        )
+
+    if n is None:
+        src = iters[-1]
+    else:
+        match = next(
+            (p for p in iters if _iter_number(p) == n),
+            None,
+        )
+        if match is None:
+            raise FileNotFoundError(
+                f"Iteration {n} not found for skill {skill_name!r}."
+            )
+        src = match
+
+    dest = _skill_dir(kb_dir, skill_name)
+    if dest.exists():
+        shutil.rmtree(dest)
+    shutil.copytree(src, dest)
+    return dest
+
+
+# ---------------------------------------------------------------------------
+# Diff
+# ---------------------------------------------------------------------------
+
+
+_DESC_RE = re.compile(
+    r"^description:\s*(.*?)\s*$",
+    re.MULTILINE,
+)
+
+
+def _extract_description(skill_md: Path) -> str:
+    """Return the ``description:`` line from a SKILL.md frontmatter,
+    or empty string if absent."""
+    if not skill_md.is_file():
+        return ""
+    text = skill_md.read_text(encoding="utf-8", errors="replace")
+    # Only scan the frontmatter block to avoid matching body text.
+    if text.startswith("---"):
+        end = text.find("\n---", 3)
+        head = text[: end if end != -1 else len(text)]
+    else:
+        head = text[:2000]
+    m = _DESC_RE.search(head)
+    return m.group(1) if m else ""
+
+
+def _list_files(root: Path, subdir: str) -> set[str]:
+    base = root / subdir
+    if not base.is_dir():
+        return set()
+    return {
+        str(p.relative_to(root)).replace("\\", "/")
+        for p in base.rglob("*")
+        if p.is_file()
+    }
+
+
+def _line_count(path: Path) -> int:
+    if not path.is_file():
+        return 0
+    return len(path.read_text(encoding="utf-8", errors="replace").splitlines())
+
+
+def write_diff(prev: Path, curr: Path, diff_path: Path) -> None:
+    """Write a human-readable structural diff from ``prev`` to ``curr``.
+
+    Covers:
+      * description: line changes (from SKILL.md frontmatter)
+      * files added/removed under ``references/`` and ``scripts/``
+      * line-count delta on SKILL.md
+    """
+    prev_desc = _extract_description(prev / "SKILL.md")
+    curr_desc = _extract_description(curr / "SKILL.md")
+
+    prev_refs = _list_files(prev, "references")
+    curr_refs = _list_files(curr, "references")
+    prev_scripts = _list_files(prev, "scripts")
+    curr_scripts = _list_files(curr, "scripts")
+
+    added = sorted((curr_refs - prev_refs) | (curr_scripts - prev_scripts))
+    removed = sorted((prev_refs - curr_refs) | (prev_scripts - curr_scripts))
+
+    prev_lc = _line_count(prev / "SKILL.md")
+    curr_lc = _line_count(curr / "SKILL.md")
+    delta = curr_lc - prev_lc
+
+    lines: list[str] = []
+    lines.append(f"# Skill diff: {prev.name} -> current\n")
+
+    lines.append("## description\n")
+    if prev_desc == curr_desc:
+        lines.append("_unchanged_\n")
+    else:
+        lines.append(f"- before: {prev_desc or '(none)'}")
+        lines.append(f"- after:  {curr_desc or '(none)'}\n")
+
+    lines.append("## files\n")
+    if not added and not removed:
+        lines.append("_no files added or removed_\n")
+    else:
+        for p in added:
+            lines.append(f"- added: {p}")
+        for p in removed:
+            lines.append(f"- removed: {p}")
+        lines.append("")
+
+    lines.append("## SKILL.md line count\n")
+    sign = "+" if delta >= 0 else ""
+    lines.append(
+        f"- before: {prev_lc} lines"
+    )
+    lines.append(
+        f"- after:  {curr_lc} lines ({sign}{delta})\n"
+    )
+
+    diff_path.parent.mkdir(parents=True, exist_ok=True)
+    diff_path.write_text("\n".join(lines), encoding="utf-8")
diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py
index 4004450d..e570d000 100644
--- a/tests/test_skill_cli.py
+++ b/tests/test_skill_cli.py
@@ -131,6 +131,127 @@ async def fake_run(kb_dir, skill_name, intent, model):
     assert (kb / "output" / "skills" / "demo" / "SKILL.md").exists()
 
 
+def test_skill_new_saves_iteration_when_overwriting(tmp_path):
+    """Overwriting with -y must copy the old skill into the workspace
+    under iteration-1/ before the generator runs."""
+    kb = _make_kb(tmp_path)
+    target = kb / "output" / "skills" / "demo"
+    target.mkdir(parents=True)
+    (target / "SKILL.md").write_text(
+        "---\nname: demo\ndescription: original description\n---\n\n# demo\n"
+    )
+
+    runner = CliRunner()
+
+    async def fake_run(kb_dir, skill_name, intent, model):
+        _fake_compile(kb_dir, skill_name)
+
+    with patch("openkb.cli._find_kb_dir", return_value=kb), \
+         patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
+        result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"])
+
+    assert result.exit_code == 0, result.output
+    iter1 = kb / "output" / "skills" / "demo-workspace" / "iteration-1"
+    assert iter1.is_dir()
+    assert (iter1 / "SKILL.md").exists()
+    assert "original description" in (iter1 / "SKILL.md").read_text()
+    # And a diff.md is dropped against the previous version
+    assert (iter1 / "diff.md").exists()
+
+
+def test_skill_history_command_lists_iterations(tmp_path):
+    """`openkb skill history <name>` lists existing iteration directories."""
+    kb = _make_kb(tmp_path)
+    ws = kb / "output" / "skills" / "demo-workspace"
+    (ws / "iteration-1").mkdir(parents=True)
+    (ws / "iteration-1" / "SKILL.md").write_text(
+        "---\nname: demo\ndescription: v1\n---\n"
+    )
+    (ws / "iteration-2").mkdir(parents=True)
+    (ws / "iteration-2" / "SKILL.md").write_text(
+        "---\nname: demo\ndescription: v2\n---\n"
+    )
+
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        result = runner.invoke(cli, ["skill", "history", "demo"])
+
+    assert result.exit_code == 0, result.output
+    assert "iteration-1" in result.output
+    assert "iteration-2" in result.output
+
+
+def test_skill_history_command_when_no_iterations(tmp_path):
+    kb = _make_kb(tmp_path)
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        result = runner.invoke(cli, ["skill", "history", "demo"])
+    assert result.exit_code == 0, result.output
+    assert "No previous iterations" in result.output
+
+
+def test_skill_rollback_restores_from_workspace(tmp_path):
+    """`openkb skill rollback <name>` copies the latest iteration back
+    into output/skills/<name>/."""
+    kb = _make_kb(tmp_path)
+    ws = kb / "output" / "skills" / "demo-workspace"
+    (ws / "iteration-1").mkdir(parents=True)
+    (ws / "iteration-1" / "SKILL.md").write_text(
+        "---\nname: demo\ndescription: restored\n---\n\n# demo\n"
+    )
+    # Current skill is "broken"
+    current = kb / "output" / "skills" / "demo"
+    current.mkdir(parents=True)
+    (current / "SKILL.md").write_text(
+        "---\nname: demo\ndescription: broken\n---\n"
+    )
+
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        result = runner.invoke(cli, ["skill", "rollback", "demo", "-y"])
+
+    assert result.exit_code == 0, result.output
+    text = (current / "SKILL.md").read_text()
+    assert "restored" in text
+    assert "broken" not in text
+    # Marketplace manifest regenerated
+    assert (kb / ".claude-plugin" / "marketplace.json").exists()
+
+
+def test_skill_rollback_to_specific_iteration(tmp_path):
+    kb = _make_kb(tmp_path)
+    ws = kb / "output" / "skills" / "demo-workspace"
+    (ws / "iteration-1").mkdir(parents=True)
+    (ws / "iteration-1" / "SKILL.md").write_text(
+        "---\nname: demo\ndescription: v1\n---\n"
+    )
+    (ws / "iteration-2").mkdir(parents=True)
+    (ws / "iteration-2" / "SKILL.md").write_text(
+        "---\nname: demo\ndescription: v2\n---\n"
+    )
+    current = kb / "output" / "skills" / "demo"
+    current.mkdir(parents=True)
+    (current / "SKILL.md").write_text("placeholder")
+
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        result = runner.invoke(
+            cli, ["skill", "rollback", "demo", "--to", "1", "-y"]
+        )
+
+    assert result.exit_code == 0, result.output
+    assert "v1" in (current / "SKILL.md").read_text()
+
+
+def test_skill_rollback_errors_when_no_iterations(tmp_path):
+    kb = _make_kb(tmp_path)
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        result = runner.invoke(cli, ["skill", "rollback", "demo", "-y"])
+    assert result.exit_code != 0
+    assert "No iterations" in result.output
+
+
 def test_skill_new_keeps_existing_skill_when_key_setup_fails(tmp_path):
     """If LLM key setup raises (e.g. no API key), the old skill output
     must be preserved — don't rmtree before key setup is verified."""
diff --git a/tests/test_skill_workspace.py b/tests/test_skill_workspace.py
new file mode 100644
index 00000000..ac4b5af7
--- /dev/null
+++ b/tests/test_skill_workspace.py
@@ -0,0 +1,153 @@
+"""Tests for :mod:`openkb.skill_workspace` — iteration save/restore + diff."""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from openkb.skill_workspace import (
+    list_iterations,
+    restore_iteration,
+    save_iteration,
+    write_diff,
+)
+
+
+def _make_skill(kb_dir: Path, name: str, *, description: str = "demo desc",
+                refs: list[str] | None = None,
+                skill_md_lines: int = 5) -> Path:
+    target = kb_dir / "output" / "skills" / name
+    target.mkdir(parents=True, exist_ok=True)
+    body = "\n".join(f"line {i}" for i in range(1, max(1, skill_md_lines) + 1))
+    (target / "SKILL.md").write_text(
+        f"---\nname: {name}\ndescription: {description}\n---\n\n{body}\n",
+        encoding="utf-8",
+    )
+    if refs:
+        refs_dir = target / "references"
+        refs_dir.mkdir(parents=True, exist_ok=True)
+        for r in refs:
+            (refs_dir / r).write_text("# ref\n", encoding="utf-8")
+    return target
+
+
+# ---------------------------------------------------------------------------
+# save_iteration
+# ---------------------------------------------------------------------------
+
+
+def test_save_iteration_returns_none_when_no_current_skill(tmp_path):
+    assert save_iteration(tmp_path, "missing") is None
+
+
+def test_save_iteration_writes_iteration_1_then_2(tmp_path):
+    _make_skill(tmp_path, "demo", description="v1")
+    first = save_iteration(tmp_path, "demo")
+    assert first is not None
+    assert first.name == "iteration-1"
+    assert (first / "SKILL.md").exists()
+    assert "v1" in (first / "SKILL.md").read_text()
+
+    # Mutate, then save again
+    _make_skill(tmp_path, "demo", description="v2")
+    second = save_iteration(tmp_path, "demo")
+    assert second is not None
+    assert second.name == "iteration-2"
+    assert "v2" in (second / "SKILL.md").read_text()
+    # First survives untouched
+    assert "v1" in (first / "SKILL.md").read_text()
+
+
+# ---------------------------------------------------------------------------
+# list_iterations
+# ---------------------------------------------------------------------------
+
+
+def test_list_iterations_returns_empty_when_no_workspace(tmp_path):
+    assert list_iterations(tmp_path, "nothing") == []
+
+
+def test_list_iterations_returns_sorted(tmp_path):
+    _make_skill(tmp_path, "demo")
+    save_iteration(tmp_path, "demo")
+    _make_skill(tmp_path, "demo", description="v2")
+    save_iteration(tmp_path, "demo")
+    iters = list_iterations(tmp_path, "demo")
+    assert [p.name for p in iters] == ["iteration-1", "iteration-2"]
+
+
+# ---------------------------------------------------------------------------
+# restore_iteration
+# ---------------------------------------------------------------------------
+
+
+def test_restore_iteration_latest_when_n_is_none(tmp_path):
+    _make_skill(tmp_path, "demo", description="v1")
+    save_iteration(tmp_path, "demo")
+    _make_skill(tmp_path, "demo", description="v2")
+    save_iteration(tmp_path, "demo")
+    # Simulate current skill being something else / broken
+    _make_skill(tmp_path, "demo", description="broken")
+
+    restored = restore_iteration(tmp_path, "demo", n=None)
+    text = (restored / "SKILL.md").read_text()
+    assert "v2" in text
+    assert "broken" not in text
+
+
+def test_restore_iteration_specific_n(tmp_path):
+    _make_skill(tmp_path, "demo", description="v1")
+    save_iteration(tmp_path, "demo")
+    _make_skill(tmp_path, "demo", description="v2")
+    save_iteration(tmp_path, "demo")
+
+    restored = restore_iteration(tmp_path, "demo", n=1)
+    assert "v1" in (restored / "SKILL.md").read_text()
+
+
+def test_restore_iteration_missing_raises(tmp_path):
+    _make_skill(tmp_path, "demo")
+    save_iteration(tmp_path, "demo")
+    with pytest.raises(FileNotFoundError):
+        restore_iteration(tmp_path, "demo", n=99)
+
+
+def test_restore_iteration_no_workspace_raises(tmp_path):
+    with pytest.raises(FileNotFoundError):
+        restore_iteration(tmp_path, "demo")
+
+
+# ---------------------------------------------------------------------------
+# write_diff
+# ---------------------------------------------------------------------------
+
+
+def test_write_diff_records_description_change(tmp_path):
+    prev = _make_skill(tmp_path / "prev", "demo", description="old description")
+    curr = _make_skill(tmp_path / "curr", "demo", description="new description")
+    out = tmp_path / "diff.md"
+    write_diff(prev, curr, out)
+    content = out.read_text()
+    assert "description" in content.lower()
+    assert "old description" in content
+    assert "new description" in content
+
+
+def test_write_diff_records_added_reference(tmp_path):
+    prev = _make_skill(tmp_path / "prev", "demo", refs=[])
+    curr = _make_skill(tmp_path / "curr", "demo", refs=["new.md"])
+    out = tmp_path / "diff.md"
+    write_diff(prev, curr, out)
+    content = out.read_text()
+    assert "added:" in content
+    assert "references/new.md" in content
+
+
+def test_write_diff_records_removed_reference(tmp_path):
+    prev = _make_skill(tmp_path / "prev", "demo", refs=["old.md"])
+    curr = _make_skill(tmp_path / "curr", "demo", refs=[])
+    out = tmp_path / "diff.md"
+    write_diff(prev, curr, out)
+    content = out.read_text()
+    assert "removed:" in content
+    assert "references/old.md" in content

From 7066e8351970d505bca531e6fe69aa39d2ddc871 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 16:13:42 +0800
Subject: [PATCH 20/25] feat(skill): pure-Python structural validator +
 auto-run on compile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Borrows from Codex skill-creator's quick_validate.py: catches the
common failure modes that would make a skill unloadable before
distribution — missing/malformed frontmatter, name/dir mismatch,
oversized files, broken references/* wikilinks, non-stdlib imports
in scripts/* (strict mode).

New CLI:
  openkb skill validate              validate all compiled skills
  openkb skill validate <name>       validate one
  openkb skill validate --strict     treat warnings as failures

openkb skill new auto-runs validation after compile and surfaces
errors + warnings so the user knows whether the freshly-compiled
skill is well-formed. Doesn't block marketplace.json regeneration —
the files are on disk and the user can fix or rollback.
---
 openkb/cli.py                 |  66 ++++++++
 openkb/skill_validator.py     | 208 +++++++++++++++++++++++
 tests/test_skill_cli.py       |  37 +++++
 tests/test_skill_validator.py | 305 ++++++++++++++++++++++++++++++++++
 4 files changed, 616 insertions(+)
 create mode 100644 openkb/skill_validator.py
 create mode 100644 tests/test_skill_validator.py

diff --git a/openkb/cli.py b/openkb/cli.py
index 66496a3d..6e248f0f 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -1517,6 +1517,22 @@ def skill_new(ctx, name, intent, yes_flag):
                 "diff generation failed: %s", exc, exc_info=True
             )
 
+    # Auto-validate the freshly compiled skill. Surface issues but don't
+    # block — files are on disk and the user can fix or rollback.
+    from openkb.skill_validator import validate_skill
+    skill_dir = kb_dir / "output" / "skills" / name
+    result = validate_skill(skill_dir)
+    if result.errors or result.warnings:
+        click.echo("\n[WARN] Validation found issues:")
+        for err in result.errors:
+            click.echo(f"  ERROR:   {err}")
+        for warn in result.warnings:
+            click.echo(f"  WARN:    {warn}")
+        click.echo(
+            f"\nRun `openkb skill validate {name}` to re-check, or "
+            f"`openkb skill rollback {name}` to revert."
+        )
+
     click.echo(f"\nSaved: output/skills/{name}/")
     if saved_iteration is not None:
         rel = saved_iteration.relative_to(kb_dir)
@@ -1654,3 +1670,53 @@ def skill_rollback(ctx, name, to_n, yes_flag):
     regenerate_marketplace(kb_dir)
     click.echo(f"Restored output/skills/{name}/ from {target_label}.")
     click.echo("Manifest: .claude-plugin/marketplace.json updated")
+
+
+@skill.command("validate")
+@click.argument("name", required=False)
+@click.option(
+    "--strict", is_flag=True, default=False,
+    help="Treat warnings as failures (exit non-zero).",
+)
+@click.pass_context
+def skill_validate(ctx, name, strict):
+    """Validate one skill (by name) or all compiled skills in this KB."""
+    from openkb.skill_validator import validate_skill
+
+    kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
+    if kb_dir is None:
+        click.echo("No knowledge base found.", err=True)
+        ctx.exit(1)
+
+    skills_root = kb_dir / "output" / "skills"
+    if not skills_root.is_dir():
+        click.echo("No skills found. Compile one with `openkb skill new`.")
+        return
+
+    if name:
+        target = skills_root / name
+        if not target.is_dir():
+            click.echo(f"[ERROR] Skill '{name}' not found.", err=True)
+            ctx.exit(1)
+        targets = [target]
+    else:
+        targets = sorted(
+            d for d in skills_root.iterdir()
+            if d.is_dir() and not d.name.endswith("-workspace")
+        )
+
+    any_failed = False
+    for t in targets:
+        result = validate_skill(t, strict=strict)
+        passed = result.passed_strict if strict else result.passed
+        prefix = "[OK]" if passed else "[FAIL]"
+        click.echo(f"{prefix} {t.name}")
+        for err in result.errors:
+            click.echo(f"  ERROR:   {err}")
+        for warn in result.warnings:
+            click.echo(f"  WARN:    {warn}")
+        if not passed:
+            any_failed = True
+
+    if any_failed:
+        ctx.exit(1)
diff --git a/openkb/skill_validator.py b/openkb/skill_validator.py
new file mode 100644
index 00000000..82de55d4
--- /dev/null
+++ b/openkb/skill_validator.py
@@ -0,0 +1,208 @@
+"""Deterministic structural validator for compiled skills.
+
+Pure Python — no LLM calls. Catches the common failure modes that would
+make a skill un-loadable or misleading to the agents that install it:
+
+  * SKILL.md missing or unparseable
+  * frontmatter missing required fields
+  * name field doesn't match directory or violates the slug rule
+  * description too long (> 1024 chars per Anthropic spec)
+  * files too big (SKILL.md > 50 KB / references/*.md > 100 KB)
+  * `[[references/...]]` wikilinks pointing at files that don't exist
+  * (strict mode) scripts/*.py importing non-stdlib modules
+
+This is the deterministic counterpart to ``openkb skill eval`` — eval
+measures whether the description fires; validate ensures the structure
+is well-formed.
+"""
+from __future__ import annotations
+
+import ast
+import re
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
+
+import yaml  # already a project dep (pyyaml)
+
+
+SKILL_NAME_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$")
+DESCRIPTION_MAX_CHARS = 1024
+SKILL_MD_MAX_BYTES = 50 * 1024
+REFERENCE_MAX_BYTES = 100 * 1024
+NAME_MAX_LEN = 64
+WIKILINK_RE = re.compile(r"\[\[references/([a-z0-9._/-]+)\]\]", re.IGNORECASE)
+
+
+@dataclass
+class ValidationResult:
+    errors: list[str] = field(default_factory=list)
+    warnings: list[str] = field(default_factory=list)
+
+    @property
+    def passed(self) -> bool:
+        return not self.errors
+
+    @property
+    def passed_strict(self) -> bool:
+        return not self.errors and not self.warnings
+
+
+def validate_skill(skill_dir: Path, *, strict: bool = False) -> ValidationResult:
+    """Run all structural checks on a single compiled skill directory.
+
+    Args:
+        skill_dir: ``<kb>/output/skills/<name>``
+        strict: if True, warnings are surfaced; otherwise only errors
+
+    Returns a ValidationResult. Use ``result.passed`` for the default
+    semantics (errors block, warnings don't); use ``result.passed_strict``
+    when running ``--strict``.
+    """
+    result = ValidationResult()
+    skill_dir = Path(skill_dir)
+
+    if not skill_dir.is_dir():
+        result.errors.append(f"Skill directory does not exist: {skill_dir}")
+        return result
+
+    skill_md = skill_dir / "SKILL.md"
+    if not skill_md.exists():
+        result.errors.append("Missing required file: SKILL.md")
+        return result
+
+    # File size
+    skill_size = skill_md.stat().st_size
+    if skill_size > SKILL_MD_MAX_BYTES:
+        result.errors.append(
+            f"SKILL.md is {skill_size} bytes; max is {SKILL_MD_MAX_BYTES} bytes."
+        )
+
+    text = skill_md.read_text(encoding="utf-8")
+
+    # Frontmatter
+    fm = _extract_frontmatter(text)
+    if fm is None:
+        result.errors.append(
+            "SKILL.md has no YAML frontmatter (must start with `---` ... `---`)."
+        )
+        return result
+
+    try:
+        meta = yaml.safe_load(fm) or {}
+    except yaml.YAMLError as exc:
+        result.errors.append(f"Frontmatter is not valid YAML: {exc}")
+        return result
+
+    if not isinstance(meta, dict):
+        result.errors.append("Frontmatter must be a YAML mapping.")
+        return result
+
+    # name field
+    name = meta.get("name")
+    if not name:
+        result.errors.append("Frontmatter is missing required field 'name:'.")
+    elif not isinstance(name, str):
+        result.errors.append("Frontmatter 'name:' must be a string.")
+    else:
+        if name != skill_dir.name:
+            result.errors.append(
+                f"Frontmatter 'name: {name}' doesn't match directory name "
+                f"'{skill_dir.name}'."
+            )
+        if not SKILL_NAME_RE.match(name) or len(name) > NAME_MAX_LEN:
+            result.errors.append(
+                f"Frontmatter 'name: {name}' must be kebab-case "
+                f"(lowercase a-z0-9 + dashes), 1-{NAME_MAX_LEN} chars."
+            )
+
+    # description field
+    desc = meta.get("description")
+    if not desc:
+        result.errors.append("Frontmatter is missing required field 'description:'.")
+    elif not isinstance(desc, str):
+        result.errors.append("Frontmatter 'description:' must be a string.")
+    else:
+        if len(desc) > DESCRIPTION_MAX_CHARS:
+            result.errors.append(
+                f"Frontmatter 'description:' is {len(desc)} chars; "
+                f"max is {DESCRIPTION_MAX_CHARS} chars."
+            )
+        if len(desc) < 20:
+            result.warnings.append(
+                f"Frontmatter 'description:' is only {len(desc)} chars — "
+                f"too short to be a useful activation signal."
+            )
+
+    # references/ wikilink resolution
+    wikilinks = WIKILINK_RE.findall(text)
+    refs_dir = skill_dir / "references"
+    for link in wikilinks:
+        # link may or may not include .md suffix
+        target = refs_dir / link
+        if not target.suffix:
+            target = target.with_suffix(".md")
+        if not target.exists():
+            result.errors.append(
+                f"SKILL.md references [[references/{link}]] but "
+                f"{target.relative_to(skill_dir)} doesn't exist."
+            )
+
+    # references/*.md file sizes
+    if refs_dir.is_dir():
+        for ref in refs_dir.rglob("*.md"):
+            size = ref.stat().st_size
+            if size > REFERENCE_MAX_BYTES:
+                result.errors.append(
+                    f"{ref.relative_to(skill_dir)} is {size} bytes; "
+                    f"max is {REFERENCE_MAX_BYTES} bytes."
+                )
+
+    # scripts/*.py imports — strict only
+    if strict:
+        scripts_dir = skill_dir / "scripts"
+        if scripts_dir.is_dir():
+            for script in scripts_dir.rglob("*.py"):
+                bad = _non_stdlib_imports(script)
+                if bad:
+                    result.warnings.append(
+                        f"{script.relative_to(skill_dir)} imports non-stdlib "
+                        f"modules: {', '.join(sorted(bad))}. Skill scripts run "
+                        f"in unknown environments — prefer stdlib only."
+                    )
+
+    return result
+
+
+def _extract_frontmatter(text: str) -> str | None:
+    """Return the YAML body between the first two `---` lines, or None."""
+    lines = text.splitlines()
+    if not lines or lines[0].strip() != "---":
+        return None
+    try:
+        end = lines.index("---", 1)
+    except ValueError:
+        return None
+    return "\n".join(lines[1:end])
+
+
+def _non_stdlib_imports(script: Path) -> set[str]:
+    """Return imported module names that aren't in the Python stdlib."""
+    try:
+        tree = ast.parse(script.read_text(encoding="utf-8"))
+    except SyntaxError:
+        return {"<syntax-error>"}
+    stdlib = set(sys.stdlib_module_names) if hasattr(sys, "stdlib_module_names") else set()
+    bad: set[str] = set()
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                root = alias.name.split(".")[0]
+                if stdlib and root not in stdlib:
+                    bad.add(root)
+        elif isinstance(node, ast.ImportFrom):
+            if node.module:
+                root = node.module.split(".")[0]
+                if stdlib and root not in stdlib:
+                    bad.add(root)
+    return bad
diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py
index e570d000..6b2ea87e 100644
--- a/tests/test_skill_cli.py
+++ b/tests/test_skill_cli.py
@@ -252,6 +252,43 @@ def test_skill_rollback_errors_when_no_iterations(tmp_path):
     assert "No iterations" in result.output
 
 
+def test_skill_validate_passes_on_valid_skill(tmp_path):
+    """`openkb skill validate <name>` exits 0 and prints OK for a well-formed skill."""
+    kb = _make_kb(tmp_path)
+    target = kb / "output" / "skills" / "demo"
+    target.mkdir(parents=True)
+    (target / "SKILL.md").write_text(
+        "---\nname: demo\n"
+        "description: A useful and descriptive activation signal for this skill.\n"
+        "---\n\n# demo\n"
+    )
+
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        result = runner.invoke(cli, ["skill", "validate", "demo"])
+
+    assert result.exit_code == 0, result.output
+    assert "[OK]" in result.output
+    assert "demo" in result.output
+
+
+def test_skill_validate_fails_on_invalid_frontmatter(tmp_path):
+    """`openkb skill validate <name>` exits non-zero on malformed YAML."""
+    kb = _make_kb(tmp_path)
+    target = kb / "output" / "skills" / "broken"
+    target.mkdir(parents=True)
+    # No frontmatter at all — must error out.
+    (target / "SKILL.md").write_text("# just a body, no frontmatter\n")
+
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb):
+        result = runner.invoke(cli, ["skill", "validate", "broken"])
+
+    assert result.exit_code != 0, result.output
+    assert "ERROR" in result.output
+    assert "[FAIL]" in result.output
+
+
 def test_skill_new_keeps_existing_skill_when_key_setup_fails(tmp_path):
     """If LLM key setup raises (e.g. no API key), the old skill output
     must be preserved — don't rmtree before key setup is verified."""
diff --git a/tests/test_skill_validator.py b/tests/test_skill_validator.py
new file mode 100644
index 00000000..06fad87a
--- /dev/null
+++ b/tests/test_skill_validator.py
@@ -0,0 +1,305 @@
+"""Unit tests for openkb.skill_validator — pure-Python structural checks
+on a compiled skill directory. No LLM, no network."""
+from __future__ import annotations
+
+from pathlib import Path
+
+from openkb.skill_validator import (
+    DESCRIPTION_MAX_CHARS,
+    REFERENCE_MAX_BYTES,
+    SKILL_MD_MAX_BYTES,
+    validate_skill,
+)
+
+
+# ---------------------------------------------------------------------------
+# helpers
+# ---------------------------------------------------------------------------
+
+def _write_skill(
+    parent: Path,
+    name: str,
+    *,
+    frontmatter: str | None = "default",
+    body: str = "# body\n",
+    refs: dict[str, str] | None = None,
+    scripts: dict[str, str] | None = None,
+    skill_md_bytes: int | None = None,
+) -> Path:
+    """Create a skill directory with the requested contents.
+
+    frontmatter:
+      "default" -> a valid minimal frontmatter with matching name + long desc
+      str       -> use that exact frontmatter block (between --- markers)
+      None      -> write the body with NO frontmatter at all
+    """
+    skill_dir = parent / name
+    skill_dir.mkdir(parents=True, exist_ok=True)
+    if frontmatter == "default":
+        fm = f"name: {name}\ndescription: A useful and descriptive skill activation signal."
+    else:
+        fm = frontmatter
+
+    if fm is None:
+        text = body
+    else:
+        text = f"---\n{fm}\n---\n\n{body}"
+
+    if skill_md_bytes is not None:
+        text = text + ("x" * skill_md_bytes)
+    (skill_dir / "SKILL.md").write_text(text, encoding="utf-8")
+
+    if refs:
+        refs_dir = skill_dir / "references"
+        refs_dir.mkdir(exist_ok=True)
+        for relpath, content in refs.items():
+            target = refs_dir / relpath
+            target.parent.mkdir(parents=True, exist_ok=True)
+            target.write_text(content, encoding="utf-8")
+
+    if scripts:
+        scripts_dir = skill_dir / "scripts"
+        scripts_dir.mkdir(exist_ok=True)
+        for relpath, content in scripts.items():
+            target = scripts_dir / relpath
+            target.parent.mkdir(parents=True, exist_ok=True)
+            target.write_text(content, encoding="utf-8")
+
+    return skill_dir
+
+
+# ---------------------------------------------------------------------------
+# happy path
+# ---------------------------------------------------------------------------
+
+def test_minimal_valid_skill_passes(tmp_path):
+    sd = _write_skill(tmp_path, "demo-skill")
+    result = validate_skill(sd)
+    assert result.passed, result.errors
+    assert result.errors == []
+    assert result.warnings == []
+
+
+# ---------------------------------------------------------------------------
+# missing/structural errors
+# ---------------------------------------------------------------------------
+
+def test_skill_directory_missing(tmp_path):
+    result = validate_skill(tmp_path / "nope")
+    assert not result.passed
+    assert any("does not exist" in e for e in result.errors)
+
+
+def test_missing_skill_md(tmp_path):
+    sd = tmp_path / "empty-skill"
+    sd.mkdir()
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("SKILL.md" in e for e in result.errors)
+
+
+def test_no_frontmatter(tmp_path):
+    sd = _write_skill(tmp_path, "no-fm", frontmatter=None, body="# just a body\n")
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("frontmatter" in e.lower() for e in result.errors)
+
+
+def test_malformed_yaml(tmp_path):
+    sd = _write_skill(tmp_path, "bad-yaml", frontmatter="name: : : bad\n  - oops\n[")
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("YAML" in e or "yaml" in e for e in result.errors)
+
+
+def test_frontmatter_not_mapping(tmp_path):
+    sd = _write_skill(tmp_path, "list-fm", frontmatter="- one\n- two")
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("mapping" in e for e in result.errors)
+
+
+# ---------------------------------------------------------------------------
+# name field
+# ---------------------------------------------------------------------------
+
+def test_name_mismatch_with_directory(tmp_path):
+    sd = _write_skill(
+        tmp_path, "dir-name",
+        frontmatter="name: other-name\ndescription: A nice long description here.",
+    )
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("doesn't match directory" in e for e in result.errors)
+
+
+def test_name_invalid_uppercase(tmp_path):
+    sd = tmp_path / "BadName"
+    sd.mkdir()
+    (sd / "SKILL.md").write_text(
+        "---\nname: BadName\ndescription: A nice long description here.\n---\n"
+    )
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("kebab-case" in e for e in result.errors)
+
+
+def test_name_invalid_underscore(tmp_path):
+    sd = tmp_path / "bad_name"
+    sd.mkdir()
+    (sd / "SKILL.md").write_text(
+        "---\nname: bad_name\ndescription: A nice long description here.\n---\n"
+    )
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("kebab-case" in e for e in result.errors)
+
+
+def test_name_missing(tmp_path):
+    sd = _write_skill(
+        tmp_path, "no-name-field",
+        frontmatter="description: A nice long description here.",
+    )
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("name" in e for e in result.errors)
+
+
+# ---------------------------------------------------------------------------
+# description field
+# ---------------------------------------------------------------------------
+
+def test_description_missing(tmp_path):
+    sd = _write_skill(tmp_path, "no-desc", frontmatter="name: no-desc")
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("description" in e for e in result.errors)
+
+
+def test_description_too_long(tmp_path):
+    long_desc = "x" * (DESCRIPTION_MAX_CHARS + 1)
+    sd = _write_skill(
+        tmp_path, "long-desc",
+        frontmatter=f"name: long-desc\ndescription: {long_desc}",
+    )
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("description" in e and "chars" in e for e in result.errors)
+
+
+def test_description_too_short_is_warning_not_error(tmp_path):
+    sd = _write_skill(
+        tmp_path, "short-desc",
+        frontmatter="name: short-desc\ndescription: too short",
+    )
+    result = validate_skill(sd)
+    # Errors should be empty — short desc is a warning, not blocking.
+    assert result.errors == []
+    assert result.passed  # passes default semantics
+    assert any("too short" in w for w in result.warnings)
+    assert not result.passed_strict  # but fails under --strict
+
+
+# ---------------------------------------------------------------------------
+# file sizes
+# ---------------------------------------------------------------------------
+
+def test_skill_md_too_big(tmp_path):
+    sd = _write_skill(
+        tmp_path, "big-skill",
+        skill_md_bytes=SKILL_MD_MAX_BYTES + 1,
+    )
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("SKILL.md" in e and "bytes" in e for e in result.errors)
+
+
+def test_reference_too_big(tmp_path):
+    big = "y" * (REFERENCE_MAX_BYTES + 1)
+    sd = _write_skill(
+        tmp_path, "big-ref",
+        refs={"huge.md": big},
+    )
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("huge.md" in e and "bytes" in e for e in result.errors)
+
+
+# ---------------------------------------------------------------------------
+# wikilinks
+# ---------------------------------------------------------------------------
+
+def test_wikilink_resolves(tmp_path):
+    sd = _write_skill(
+        tmp_path, "with-ref",
+        body="See [[references/topic.md]] for details.\n",
+        refs={"topic.md": "# topic\n"},
+    )
+    result = validate_skill(sd)
+    assert result.passed, result.errors
+
+
+def test_wikilink_missing_target(tmp_path):
+    sd = _write_skill(
+        tmp_path, "broken-ref",
+        body="See [[references/missing.md]] for details.\n",
+    )
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("missing.md" in e for e in result.errors)
+
+
+def test_wikilink_without_md_suffix_resolves(tmp_path):
+    sd = _write_skill(
+        tmp_path, "ref-no-suffix",
+        body="See [[references/topic]] for details.\n",
+        refs={"topic.md": "# topic\n"},
+    )
+    result = validate_skill(sd)
+    assert result.passed, result.errors
+
+
+# ---------------------------------------------------------------------------
+# scripts/ imports — strict mode only
+# ---------------------------------------------------------------------------
+
+def test_scripts_stdlib_only_no_warning(tmp_path):
+    sd = _write_skill(
+        tmp_path, "stdlib-script",
+        scripts={"do.py": "import os\nimport sys\nfrom pathlib import Path\n"},
+    )
+    result = validate_skill(sd, strict=True)
+    assert result.passed_strict, (result.errors, result.warnings)
+
+
+def test_scripts_non_stdlib_warning_only_in_strict(tmp_path):
+    sd = _write_skill(
+        tmp_path, "requests-script",
+        scripts={"fetch.py": "import requests\nimport os\n"},
+    )
+
+    # Non-strict: no warning surfaced (we still pass).
+    result = validate_skill(sd, strict=False)
+    assert result.passed
+    assert result.warnings == []
+
+    # Strict: warning surfaced, fails strict.
+    result = validate_skill(sd, strict=True)
+    assert result.passed  # no errors
+    assert not result.passed_strict
+    assert any("requests" in w for w in result.warnings)
+
+
+# ---------------------------------------------------------------------------
+# passed vs passed_strict semantics
+# ---------------------------------------------------------------------------
+
+def test_passed_vs_passed_strict_semantics(tmp_path):
+    # Skill that has a warning (short desc) but no errors.
+    sd = _write_skill(
+        tmp_path, "warn-only",
+        frontmatter="name: warn-only\ndescription: short",
+    )
+    result = validate_skill(sd)
+    assert result.passed is True
+    assert result.passed_strict is False

From 6f70d022a92dcd88321382822ea3a60e913aae8d Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 16:19:00 +0800
Subject: [PATCH 21/25] feat(skill): trigger-accuracy evaluator (skill eval)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Borrows from Anthropic skill-creator's evaluation loop, simplified for
v0.3: measure whether a skill's description: field actually fires when
it should and stays quiet when it shouldn't. The description is the
activation signal other agents read; a vague description silently fails
to load when it ought to.

Flow:
  1. LLM generates N should-trigger + N should-not prompts from the
     description only
  2. Grader LLM scores each: 'should this description activate this
     skill for this question?'
  3. Compare to ground truth, print pass rate + misses

New CLI:
  openkb skill eval <name>                run eval (10+10 default)
  openkb skill eval <name> --save         persist prompts to disk
  openkb skill eval <name> --eval-set X   reuse saved prompts
  openkb skill eval <name> --count N      override prompt count

Tests mock Runner.run for both generator and grader — no real LLM
calls in CI. Saved eval sets live at .openkb/eval-sets/<name>.json
for reproducibility.
---
 openkb/cli.py                 |  74 +++++++++++
 openkb/skill_evaluator.py     | 220 ++++++++++++++++++++++++++++++++
 tests/test_skill_cli.py       |  80 ++++++++++++
 tests/test_skill_evaluator.py | 232 ++++++++++++++++++++++++++++++++++
 4 files changed, 606 insertions(+)
 create mode 100644 openkb/skill_evaluator.py
 create mode 100644 tests/test_skill_evaluator.py

diff --git a/openkb/cli.py b/openkb/cli.py
index 6e248f0f..75e2dce7 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -1720,3 +1720,77 @@ def skill_validate(ctx, name, strict):
 
     if any_failed:
         ctx.exit(1)
+
+
+@skill.command("eval")
+@click.argument("name")
+@click.option(
+    "--save", "save_flag", is_flag=True, default=False,
+    help="Persist the generated eval set to .openkb/eval-sets/<name>.json",
+)
+@click.option(
+    "--eval-set", "eval_set_path", default=None, type=click.Path(),
+    help="Use a saved eval set instead of generating fresh prompts.",
+)
+@click.option(
+    "--count", default=10, type=int,
+    help="Number of should-trigger + should-not prompts (each).",
+)
+@click.pass_context
+def skill_eval(ctx, name, save_flag, eval_set_path, count):
+    """Measure how accurately a compiled skill's description fires.
+
+    Generates trigger-eval prompts via LLM, then asks a grader LLM whether
+    the description should activate the skill for each prompt. Prints pass
+    rate + miss list.
+    """
+    import asyncio
+    from openkb.skill_evaluator import (
+        run_eval, save_eval_set, load_eval_set, EvalPrompt,
+    )
+
+    kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
+    if kb_dir is None:
+        click.echo("No knowledge base found.", err=True)
+        ctx.exit(1)
+
+    skill_dir = kb_dir / "output" / "skills" / name
+    if not skill_dir.is_dir():
+        click.echo(f"[ERROR] Skill '{name}' not found.", err=True)
+        ctx.exit(1)
+
+    _setup_llm_key(kb_dir)
+    config = load_config(kb_dir / ".openkb" / "config.yaml")
+    model = config.get("model", DEFAULT_CONFIG["model"])
+
+    eval_set: list[EvalPrompt] | None = None
+    if eval_set_path:
+        eval_set = load_eval_set(Path(eval_set_path))
+        click.echo(f"Loaded eval set from {eval_set_path} ({len(eval_set)} prompts).")
+    else:
+        click.echo(f"Generating eval set for '{name}' (count={count} per side)...")
+
+    try:
+        result = asyncio.run(run_eval(
+            skill_dir, model=model, eval_set=eval_set, count=count,
+        ))
+    except RuntimeError as exc:
+        click.echo(f"[ERROR] {exc}", err=True)
+        ctx.exit(1)
+
+    click.echo(f"\nEval set: {result.total} prompts")
+    click.echo(
+        f"Pass rate: {result.passed}/{result.total} "
+        f"({result.pass_rate * 100:.0f}%)"
+    )
+
+    if result.misses:
+        click.echo(f"\nMisses ({len(result.misses)}):")
+        for miss in result.misses:
+            click.echo(f"  - {miss.label} {miss.prompt.question}")
+    else:
+        click.echo("\nAll prompts graded correctly.")
+
+    if save_flag and eval_set is None:
+        path = save_eval_set(kb_dir, name, result.prompts)
+        click.echo(f"\nEval set persisted to {path}")
diff --git a/openkb/skill_evaluator.py b/openkb/skill_evaluator.py
new file mode 100644
index 00000000..6aa9a15d
--- /dev/null
+++ b/openkb/skill_evaluator.py
@@ -0,0 +1,220 @@
+"""Trigger-accuracy evaluator for compiled skills.
+
+The description: field in SKILL.md is the activation signal — it's what
+other agents read to decide whether to load the skill for a given user
+question. A vague or off-target description fails to fire when it should
+(false negatives) or fires when it shouldn't (false positives). This
+module catches both.
+
+Flow:
+  1. Read description from the skill's SKILL.md frontmatter
+  2. Ask a generator LLM: produce N should-trigger + N should-not prompts
+     based purely on the description (no other context)
+  3. For each prompt, ask a grader LLM: "given just this description,
+     should an agent load this skill for this question? yes/no"
+  4. Compare against expected labels (the ground truth from step 2)
+  5. Report pass rate + the specific misses
+
+Uses the same LiteLLM model the rest of the KB uses (config.yaml). No
+real LLM calls in tests — both generator and grader are patched.
+"""
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Literal
+
+import yaml
+
+from agents import Agent, Runner
+from agents.model_settings import ModelSettings
+
+
+EVAL_DEFAULT_COUNT = 10  # 10 trigger + 10 no-trigger = 20 prompts
+
+
+@dataclass
+class EvalPrompt:
+    question: str
+    expected: Literal["trigger", "no-trigger"]
+
+
+@dataclass
+class EvalMiss:
+    prompt: EvalPrompt
+    graded: Literal["trigger", "no-trigger"]
+
+    @property
+    def label(self) -> str:
+        return f"[{self.prompt.expected} -> graded {self.graded}]"
+
+
+@dataclass
+class EvalResult:
+    prompts: list[EvalPrompt] = field(default_factory=list)
+    misses: list[EvalMiss] = field(default_factory=list)
+
+    @property
+    def total(self) -> int:
+        return len(self.prompts)
+
+    @property
+    def passed(self) -> int:
+        return self.total - len(self.misses)
+
+    @property
+    def pass_rate(self) -> float:
+        return self.passed / self.total if self.total else 0.0
+
+
+def _read_description(skill_dir: Path) -> str:
+    """Extract the description: field from SKILL.md frontmatter."""
+    skill_md = skill_dir / "SKILL.md"
+    text = skill_md.read_text(encoding="utf-8")
+    lines = text.splitlines()
+    if not lines or lines[0].strip() != "---":
+        raise RuntimeError(f"{skill_md} has no YAML frontmatter.")
+    try:
+        end = lines.index("---", 1)
+    except ValueError:
+        raise RuntimeError(f"{skill_md} has no YAML frontmatter.")
+    meta = yaml.safe_load("\n".join(lines[1:end])) or {}
+    desc = meta.get("description")
+    if not isinstance(desc, str) or not desc:
+        raise RuntimeError(f"{skill_md} has no description: field.")
+    return desc
+
+
+async def generate_eval_set(
+    skill_dir: Path,
+    *,
+    model: str,
+    count: int = EVAL_DEFAULT_COUNT,
+) -> list[EvalPrompt]:
+    """Use an LLM to generate ``count`` should-trigger + ``count`` should-not
+    eval prompts based on the skill's description.
+    """
+    desc = _read_description(skill_dir)
+
+    instructions = (
+        "You are designing an evaluation set for a knowledge-base skill. "
+        f"The skill's activation description is:\n\n"
+        f"  {desc}\n\n"
+        f"Produce exactly {count} 'should-trigger' user questions (questions where "
+        f"an agent SHOULD load this skill to answer well) and exactly {count} "
+        f"'should-not' user questions (plausible-sounding questions about other "
+        f"topics where this skill is NOT the right tool).\n\n"
+        f"Output ONLY a JSON object with this exact shape:\n"
+        f'  {{"should_trigger": [...{count} strings...], '
+        f'"should_not": [...{count} strings...]}}\n\n'
+        f"No prose. No markdown. Just the JSON object."
+    )
+
+    agent = Agent(
+        name="eval-set-generator",
+        instructions=instructions,
+        model=f"litellm/{model}",
+        model_settings=ModelSettings(parallel_tool_calls=False),
+    )
+    result = await Runner.run(agent, "Generate the eval set now.", max_turns=3)
+    raw = (result.final_output or "").strip()
+
+    # Strip optional code fence
+    if raw.startswith("```"):
+        raw = raw.split("\n", 1)[1].rsplit("```", 1)[0].strip()
+        if raw.startswith("json"):
+            raw = raw[4:].lstrip()
+
+    data = json.loads(raw)
+    prompts: list[EvalPrompt] = []
+    for q in data.get("should_trigger", []):
+        prompts.append(EvalPrompt(question=q, expected="trigger"))
+    for q in data.get("should_not", []):
+        prompts.append(EvalPrompt(question=q, expected="no-trigger"))
+    return prompts
+
+
+async def grade_one(
+    description: str,
+    question: str,
+    *,
+    model: str,
+) -> Literal["trigger", "no-trigger"]:
+    """Ask the grader LLM whether the description suggests this skill
+    should be loaded for the given question."""
+    instructions = (
+        "You are deciding whether an agent should load a specific skill to "
+        "answer a user question. You will be given the skill's activation "
+        "description and a single user question. Answer with one word: "
+        "TRIGGER (load the skill) or NO-TRIGGER (don't load).\n\n"
+        f"Skill description:\n  {description}\n\n"
+        "Reply with exactly one of: TRIGGER, NO-TRIGGER."
+    )
+    agent = Agent(
+        name="trigger-grader",
+        instructions=instructions,
+        model=f"litellm/{model}",
+        model_settings=ModelSettings(parallel_tool_calls=False),
+    )
+    result = await Runner.run(agent, f"Question: {question}", max_turns=2)
+    raw = (result.final_output or "").strip().upper()
+    if "NO-TRIGGER" in raw or "NO TRIGGER" in raw:
+        return "no-trigger"
+    if "TRIGGER" in raw:
+        return "trigger"
+    # Default: assume no-trigger on ambiguous output
+    return "no-trigger"
+
+
+async def run_eval(
+    skill_dir: Path,
+    *,
+    model: str,
+    eval_set: list[EvalPrompt] | None = None,
+    count: int = EVAL_DEFAULT_COUNT,
+) -> EvalResult:
+    """Run a trigger-accuracy evaluation.
+
+    Args:
+        skill_dir: ``<kb>/output/skills/<name>``
+        model: LiteLLM model string from KB config
+        eval_set: pre-generated prompts; if None, generate fresh
+        count: how many should-trigger + should-not prompts to generate
+    """
+    if eval_set is None:
+        eval_set = await generate_eval_set(skill_dir, model=model, count=count)
+
+    desc = _read_description(skill_dir)
+    result = EvalResult(prompts=eval_set)
+    for prompt in eval_set:
+        graded = await grade_one(desc, prompt.question, model=model)
+        if graded != prompt.expected:
+            result.misses.append(EvalMiss(prompt=prompt, graded=graded))
+    return result
+
+
+def save_eval_set(
+    kb_dir: Path, skill_name: str, prompts: list[EvalPrompt],
+) -> Path:
+    """Persist an eval set to ``<kb>/.openkb/eval-sets/<skill_name>.json``."""
+    out_dir = kb_dir / ".openkb" / "eval-sets"
+    out_dir.mkdir(parents=True, exist_ok=True)
+    out_path = out_dir / f"{skill_name}.json"
+    data = {
+        "should_trigger": [p.question for p in prompts if p.expected == "trigger"],
+        "should_not": [p.question for p in prompts if p.expected == "no-trigger"],
+    }
+    out_path.write_text(json.dumps(data, indent=2) + "\n", encoding="utf-8")
+    return out_path
+
+
+def load_eval_set(path: Path) -> list[EvalPrompt]:
+    """Load an eval set previously saved via ``save_eval_set``."""
+    data = json.loads(Path(path).read_text(encoding="utf-8"))
+    out: list[EvalPrompt] = []
+    for q in data.get("should_trigger", []):
+        out.append(EvalPrompt(question=q, expected="trigger"))
+    for q in data.get("should_not", []):
+        out.append(EvalPrompt(question=q, expected="no-trigger"))
+    return out
diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py
index 6b2ea87e..7d42297a 100644
--- a/tests/test_skill_cli.py
+++ b/tests/test_skill_cli.py
@@ -306,3 +306,83 @@ def test_skill_new_keeps_existing_skill_when_key_setup_fails(tmp_path):
     assert result.exit_code != 0
     # Old skill must still be there
     assert (target / "stale.txt").read_text() == "priceless"
+
+
+# --------------------------------------------------------------------------
+# `openkb skill eval` — trigger-accuracy evaluator
+# --------------------------------------------------------------------------
+
+def _make_skill_dir(kb_dir, name="demo", description="Triggers for demo questions."):
+    """Create a minimal compiled skill on disk under <kb>/output/skills/<name>."""
+    skill_dir = kb_dir / "output" / "skills" / name
+    skill_dir.mkdir(parents=True, exist_ok=True)
+    (skill_dir / "SKILL.md").write_text(
+        f"---\nname: {name}\ndescription: {description}\n---\n\n# {name}\n",
+        encoding="utf-8",
+    )
+    return skill_dir
+
+
+def test_skill_eval_runs_with_provided_eval_set(tmp_path):
+    """Pass a pre-saved eval set + a perfect-grader mock — expect 100% pass."""
+    kb = _make_kb(tmp_path)
+    _make_skill_dir(kb, "demo")
+
+    # Save an eval set we can point --eval-set at.
+    eval_dir = kb / ".openkb" / "eval-sets"
+    eval_dir.mkdir(parents=True)
+    eval_path = eval_dir / "demo.json"
+    eval_path.write_text(json.dumps({
+        "should_trigger": ["t0", "t1"],
+        "should_not": ["n0", "n1"],
+    }))
+
+    async def perfect_grader(description, question, *, model):
+        return "trigger" if question.startswith("t") else "no-trigger"
+
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb), \
+         patch("openkb.cli._setup_llm_key", return_value=None), \
+         patch("openkb.skill_evaluator.grade_one", side_effect=perfect_grader):
+        result = runner.invoke(cli, [
+            "skill", "eval", "demo", "--eval-set", str(eval_path),
+        ])
+
+    assert result.exit_code == 0, result.output
+    assert "Pass rate" in result.output
+    assert "4/4" in result.output
+    assert "100%" in result.output
+    assert "All prompts graded correctly" in result.output
+
+
+def test_skill_eval_reports_misses(tmp_path):
+    """Grader always returns 'trigger' — the no-trigger half must show as misses."""
+    kb = _make_kb(tmp_path)
+    _make_skill_dir(kb, "demo")
+
+    eval_dir = kb / ".openkb" / "eval-sets"
+    eval_dir.mkdir(parents=True)
+    eval_path = eval_dir / "demo.json"
+    eval_path.write_text(json.dumps({
+        "should_trigger": ["t0", "t1"],
+        "should_not": ["n0", "n1"],
+    }))
+
+    async def biased_grader(description, question, *, model):
+        return "trigger"
+
+    runner = CliRunner()
+    with patch("openkb.cli._find_kb_dir", return_value=kb), \
+         patch("openkb.cli._setup_llm_key", return_value=None), \
+         patch("openkb.skill_evaluator.grade_one", side_effect=biased_grader):
+        result = runner.invoke(cli, [
+            "skill", "eval", "demo", "--eval-set", str(eval_path),
+        ])
+
+    assert result.exit_code == 0, result.output
+    assert "Pass rate" in result.output
+    assert "2/4" in result.output
+    assert "Misses (2)" in result.output
+    # Each missed prompt must be listed in the output
+    assert "n0" in result.output
+    assert "n1" in result.output
diff --git a/tests/test_skill_evaluator.py b/tests/test_skill_evaluator.py
new file mode 100644
index 00000000..3863051e
--- /dev/null
+++ b/tests/test_skill_evaluator.py
@@ -0,0 +1,232 @@
+"""Tests for openkb.skill_evaluator.
+
+The Runner.run call is mocked everywhere — no real LLM tokens spent.
+What we DO verify:
+  * Description extraction from SKILL.md frontmatter
+  * Generator output parsing (with + without code fences)
+  * Grader response handling (uppercase / lowercase / ambiguous)
+  * End-to-end run_eval with mocked grading
+  * Save/load round-trip for persisted eval sets
+"""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from openkb.skill_evaluator import (
+    EvalMiss,
+    EvalPrompt,
+    EvalResult,
+    _read_description,
+    generate_eval_set,
+    grade_one,
+    load_eval_set,
+    run_eval,
+    save_eval_set,
+)
+
+
+def _make_skill(tmp_path: Path, *, description: str | None = "Triggers for foo questions.") -> Path:
+    """Create a minimal SKILL.md and return the skill directory."""
+    skill_dir = tmp_path / "output" / "skills" / "demo"
+    skill_dir.mkdir(parents=True)
+    if description is None:
+        body = "---\nname: demo\n---\n\n# demo\n"
+    else:
+        body = f"---\nname: demo\ndescription: {description}\n---\n\n# demo\n"
+    (skill_dir / "SKILL.md").write_text(body, encoding="utf-8")
+    return skill_dir
+
+
+# -------- _read_description ---------------------------------------------------
+
+
+def test_read_description_extracts_field(tmp_path):
+    skill_dir = _make_skill(tmp_path, description="Distill thoughts about transformers.")
+    assert _read_description(skill_dir) == "Distill thoughts about transformers."
+
+
+def test_read_description_raises_on_missing_frontmatter(tmp_path):
+    skill_dir = tmp_path / "output" / "skills" / "demo"
+    skill_dir.mkdir(parents=True)
+    (skill_dir / "SKILL.md").write_text("# demo\n\nNo frontmatter at all.\n")
+    with pytest.raises(RuntimeError, match="frontmatter"):
+        _read_description(skill_dir)
+
+
+def test_read_description_raises_on_missing_description_field(tmp_path):
+    skill_dir = _make_skill(tmp_path, description=None)
+    with pytest.raises(RuntimeError, match="description"):
+        _read_description(skill_dir)
+
+
+# -------- generate_eval_set ---------------------------------------------------
+
+
+def _fake_generator_payload(count: int = 10) -> str:
+    trig = [f"trigger question {i}" for i in range(count)]
+    no = [f"unrelated question {i}" for i in range(count)]
+    return json.dumps({"should_trigger": trig, "should_not": no})
+
+
+@pytest.mark.asyncio
+async def test_generate_eval_set_parses_plain_json(tmp_path):
+    skill_dir = _make_skill(tmp_path)
+
+    async def fake_runner(*args, **kwargs):
+        return SimpleNamespace(final_output=_fake_generator_payload(10))
+
+    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+        prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=10)
+
+    assert len(prompts) == 20
+    assert sum(1 for p in prompts if p.expected == "trigger") == 10
+    assert sum(1 for p in prompts if p.expected == "no-trigger") == 10
+    assert prompts[0].question == "trigger question 0"
+    assert prompts[10].question == "unrelated question 0"
+
+
+@pytest.mark.asyncio
+async def test_generate_eval_set_strips_code_fences(tmp_path):
+    skill_dir = _make_skill(tmp_path)
+    fenced = "```json\n" + _fake_generator_payload(3) + "\n```"
+
+    async def fake_runner(*args, **kwargs):
+        return SimpleNamespace(final_output=fenced)
+
+    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+        prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=3)
+
+    assert len(prompts) == 6
+
+
+# -------- grade_one -----------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_grade_one_returns_trigger_for_trigger_response():
+    async def fake_runner(*args, **kwargs):
+        return SimpleNamespace(final_output="TRIGGER")
+
+    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+        out = await grade_one("desc", "question?", model="gpt-4o-mini")
+    assert out == "trigger"
+
+
+@pytest.mark.asyncio
+async def test_grade_one_returns_no_trigger_for_negative_response():
+    async def fake_runner(*args, **kwargs):
+        return SimpleNamespace(final_output="NO-TRIGGER")
+
+    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+        out = await grade_one("desc", "question?", model="gpt-4o-mini")
+    assert out == "no-trigger"
+
+
+@pytest.mark.asyncio
+async def test_grade_one_handles_mixed_case():
+    async def fake_runner(*args, **kwargs):
+        return SimpleNamespace(final_output="trigger")
+
+    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+        out = await grade_one("desc", "question?", model="gpt-4o-mini")
+    assert out == "trigger"
+
+
+@pytest.mark.asyncio
+async def test_grade_one_handles_space_variant():
+    async def fake_runner(*args, **kwargs):
+        return SimpleNamespace(final_output="No Trigger")
+
+    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+        out = await grade_one("desc", "question?", model="gpt-4o-mini")
+    assert out == "no-trigger"
+
+
+@pytest.mark.asyncio
+async def test_grade_one_defaults_to_no_trigger_on_ambiguous_output():
+    async def fake_runner(*args, **kwargs):
+        return SimpleNamespace(final_output="hmm not sure")
+
+    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+        out = await grade_one("desc", "question?", model="gpt-4o-mini")
+    assert out == "no-trigger"
+
+
+# -------- run_eval ------------------------------------------------------------
+
+
+def _build_eval_set(n_trig: int = 3, n_no: int = 3) -> list[EvalPrompt]:
+    prompts: list[EvalPrompt] = []
+    for i in range(n_trig):
+        prompts.append(EvalPrompt(question=f"trig {i}", expected="trigger"))
+    for i in range(n_no):
+        prompts.append(EvalPrompt(question=f"no {i}", expected="no-trigger"))
+    return prompts
+
+
+@pytest.mark.asyncio
+async def test_run_eval_happy_path_all_correct(tmp_path):
+    skill_dir = _make_skill(tmp_path)
+    eval_set = _build_eval_set(3, 3)
+
+    async def fake_grade(description, question, *, model):
+        # Return the ground truth label — perfect grader.
+        match = next(p for p in eval_set if p.question == question)
+        return match.expected
+
+    with patch("openkb.skill_evaluator.grade_one", side_effect=fake_grade):
+        result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set)
+
+    assert isinstance(result, EvalResult)
+    assert result.total == 6
+    assert result.passed == 6
+    assert result.pass_rate == 1.0
+    assert result.misses == []
+
+
+@pytest.mark.asyncio
+async def test_run_eval_reports_misses(tmp_path):
+    skill_dir = _make_skill(tmp_path)
+    eval_set = _build_eval_set(3, 3)
+
+    # Grader always says "trigger" — so the 3 no-trigger prompts will miss.
+    async def fake_grade(description, question, *, model):
+        return "trigger"
+
+    with patch("openkb.skill_evaluator.grade_one", side_effect=fake_grade):
+        result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set)
+
+    assert result.total == 6
+    assert result.passed == 3
+    assert len(result.misses) == 3
+    assert all(m.prompt.expected == "no-trigger" for m in result.misses)
+    assert all(m.graded == "trigger" for m in result.misses)
+    assert result.pass_rate == pytest.approx(0.5)
+    # EvalMiss.label sanity check
+    assert "no-trigger" in result.misses[0].label
+    assert "trigger" in result.misses[0].label
+
+
+# -------- save/load round-trip ------------------------------------------------
+
+
+def test_save_and_load_eval_set_round_trip(tmp_path):
+    prompts = _build_eval_set(2, 2)
+    path = save_eval_set(tmp_path, "demo", prompts)
+
+    assert path == tmp_path / ".openkb" / "eval-sets" / "demo.json"
+    assert path.is_file()
+
+    data = json.loads(path.read_text())
+    assert data["should_trigger"] == ["trig 0", "trig 1"]
+    assert data["should_not"] == ["no 0", "no 1"]
+
+    loaded = load_eval_set(path)
+    assert len(loaded) == 4
+    assert [p.question for p in loaded if p.expected == "trigger"] == ["trig 0", "trig 1"]
+    assert [p.question for p in loaded if p.expected == "no-trigger"] == ["no 0", "no 1"]

From a5fe56715cda7a70a2f007849c1b448b2ddf0c31 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 16:20:18 +0800
Subject: [PATCH 22/25] docs(readme): document validate / eval / history /
 rollback commands
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Skill Factory section now lists the 4 quality-gate commands borrowed
from Codex (validate) and Anthropic skill-creator (eval + iteration):

  openkb skill validate    structural lint (frontmatter, sizes, refs)
  openkb skill eval        trigger-accuracy test of the description
  openkb skill history     list past iterations
  openkb skill rollback    restore a previous iteration

The slogan promised a 'digital expert' — these commands are what makes
the output worth that label.
---
 README.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/README.md b/README.md
index a8010e02..3b0f0052 100644
--- a/README.md
+++ b/README.md
@@ -186,6 +186,9 @@ A "generator" reads from the compiled wiki and produces something usable: an ans
 | <code>openkb&nbsp;query&nbsp;"question"</code> | A grounded answer with citations (use `--save` to persist to `wiki/explorations/`) |
 | `openkb chat` | Interactive multi-turn session over the wiki (use `--resume`, `--list`, `--delete` to manage sessions) |
 | <code>openkb&nbsp;skill&nbsp;new&nbsp;&lt;name&gt;&nbsp;"&lt;intent&gt;"</code> | A redistributable Anthropic Skill at `<kb>/output/skills/<name>/` + auto-updated `marketplace.json` |
+| <code>openkb&nbsp;skill&nbsp;validate&nbsp;[name]</code> | Structural lint of compiled skills (frontmatter, file sizes, wikilinks, scripts/ stdlib check with `--strict`). Auto-runs at end of `skill new` |
+| <code>openkb&nbsp;skill&nbsp;eval&nbsp;&lt;name&gt;</code> | Trigger-accuracy evaluation — does the `description:` field actually fire? LLM generates eval prompts; grader LLM scores activation. `--save` persists the eval set |
+| <code>openkb&nbsp;skill&nbsp;history&nbsp;&lt;name&gt;</code> / <code>openkb&nbsp;skill&nbsp;rollback&nbsp;&lt;name&gt;</code> | Iteration workspace — every overwrite saves the previous version to `output/skills/<name>-workspace/iteration-N/` with a structural diff. Rollback restores any iteration |
 
 ### Query & Chat — ask the wiki
 
@@ -256,6 +259,21 @@ npx skills@latest add <your-org>/<your-repo>
 [agent edits SKILL.md frontmatter in place]
 ```
 
+**Quality gates** — borrowing from [Codex skill-creator](https://github.com/openai/skills) (structural validation) and [Anthropic skill-creator](https://github.com/anthropics/skills/tree/main/skills/skill-creator) (trigger-accuracy evals):
+
+```bash
+# Lint structure (auto-runs at end of `skill new`)
+openkb skill validate karpathy-thinking
+openkb skill validate --strict          # treat warnings as failures
+
+# Does the description actually fire when it should?
+openkb skill eval karpathy-thinking --save
+
+# History + rollback if a new iteration regresses
+openkb skill history karpathy-thinking
+openkb skill rollback karpathy-thinking --to 2
+```
+
 See [CONTRIBUTING.md](CONTRIBUTING.md) for how to submit your compiled skill back to the community registry at [VectifyAI/OpenKB](https://github.com/VectifyAI/OpenKB).
 
 ### Configuration

From bc916bc2cb25a091a3172335f49209de2671c93d Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 16:39:45 +0800
Subject: [PATCH 23/25] fix(skill): round-2 review fixes + drop community docs

Code review round-2 flagged the eval pipeline reintroducing the
MaxTurnsExceeded/JSONDecodeError traceback leak that round-1 caught
for skill new. Apply the same shim inside skill_evaluator + 4 other
carryover items:

- Translate MaxTurnsExceeded and json.JSONDecodeError to RuntimeError
  inside generate_eval_set and grade_one. CLI catch (RuntimeError) now
  covers both.
- Wrap _setup_llm_key in skill_eval with the same try/except/exit
  pattern as skill_new / query / chat.
- Move openkb/skill_evaluator.py -> openkb/agent/skill_evaluator.py.
  Modules that construct Agent live under openkb/agent/ per repo
  convention; top-level openkb/ keeps marketplace + generator (no
  agents SDK).
- Validator: reject '<' / '>' in description (Anthropic parser
  requirement); warn on unknown frontmatter keys (Anthropic spec
  allows a fixed set).
- Drop redundant in-function 'import asyncio' from skill_eval (already
  at module top).
- Drop unused EvalMiss import from tests.
- Validator module docstring updated to enumerate all checks.

Also delete community contribution scaffolding (CONTRIBUTING.md +
.github/PULL_REQUEST_TEMPLATE/skill_submission.md) - premature for the
project's current stage; will revisit when real contributors arrive.
---
 .../PULL_REQUEST_TEMPLATE/skill_submission.md | 22 ------
 CONTRIBUTING.md                               | 71 -------------------
 README.md                                     |  2 -
 openkb/{ => agent}/skill_evaluator.py         | 27 ++++++-
 openkb/cli.py                                 |  9 ++-
 openkb/skill_validator.py                     | 32 +++++++--
 tests/test_skill_cli.py                       |  4 +-
 tests/test_skill_evaluator.py                 | 56 +++++++++++----
 tests/test_skill_validator.py                 | 32 +++++++++
 9 files changed, 134 insertions(+), 121 deletions(-)
 delete mode 100644 .github/PULL_REQUEST_TEMPLATE/skill_submission.md
 delete mode 100644 CONTRIBUTING.md
 rename openkb/{ => agent}/skill_evaluator.py (87%)

diff --git a/.github/PULL_REQUEST_TEMPLATE/skill_submission.md b/.github/PULL_REQUEST_TEMPLATE/skill_submission.md
deleted file mode 100644
index a413744a..00000000
--- a/.github/PULL_REQUEST_TEMPLATE/skill_submission.md
+++ /dev/null
@@ -1,22 +0,0 @@
-## Skill submission
-
-- **Name:** `<skill-name>`
-- **What it does (one sentence):**
-- **Source material:**
-
-### Checklist
-
-- [ ] I have the right to redistribute the source material this skill is
-      derived from (public, my own work, properly licensed, or fair-use
-      methodology only — not bulk-copied copyrighted text).
-- [ ] `SKILL.md` has valid YAML frontmatter with `name:` and `description:`
-      (description ≤ 1024 chars).
-- [ ] All `[[references/...]]` links resolve.
-- [ ] I added an entry under `plugins[0].skills` in
-      `.claude-plugin/marketplace.json`.
-- [ ] I installed and tested this skill locally in at least one agent CLI
-      (Claude Code / Codex CLI / Gemini CLI / Cursor).
-
-### Notes for reviewers
-
-<anything else maintainers should know>
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
deleted file mode 100644
index dfbb7eb2..00000000
--- a/CONTRIBUTING.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# Contributing to OpenKB
-
-Thanks for your interest in OpenKB! There are two ways to contribute: code (bug
-fixes, new features) and skills (compiled knowledge artifacts).
-
-## Code contributions
-
-Standard GitHub workflow:
-
-1. Fork the repo and create a feature branch off `main`.
-2. Add tests for any new behaviour. Run `uv run pytest` to verify they pass.
-3. Open a PR with a clear description and link to any related issues.
-4. Address review feedback. A maintainer will merge once everything looks good.
-
-## Submitting a skill
-
-OpenKB doubles as a registry for compiled-knowledge skills. The `skills/`
-directory in this repo hosts the official `openkb` skill plus
-community-contributed skills installable via:
-
-```bash
-npx skills@latest add VectifyAI/OpenKB
-```
-
-To submit your skill:
-
-### 1. Compile and test locally
-
-```bash
-cd <your-kb>
-openkb skill new <skill-name> "<one-sentence intent>"
-cp -r output/skills/<skill-name> ~/.claude/skills/
-# Verify the skill activates in a Claude Code session on a relevant question
-```
-
-### 2. Open a PR against this repo
-
-1. Fork `VectifyAI/OpenKB`.
-2. Copy your skill into the fork at `skills/<your-skill-name>/`.
-3. Add a new entry under `plugins[0].skills` in `.claude-plugin/marketplace.json`:
-   ```json
-   "./skills/<your-skill-name>"
-   ```
-4. Open a PR with the "Skill submission" template (auto-applied when you create
-   the PR).
-
-### 3. Review
-
-A maintainer will review your submission against this checklist:
-
-- `SKILL.md` has a valid YAML frontmatter with `name:` and `description:`
-  (description ≤ 1024 characters).
-- The `description` is specific (not "a skill about X").
-- All `[[references/...]]` links resolve to files you actually included.
-- Skill name doesn't clash with an existing entry.
-- No obvious copyright-infringing content (large verbatim copies of recent
-  copyrighted books or papers without authorisation).
-
-You're responsible for confirming you have rights to redistribute the source
-material your skill is derived from. The PR template includes a checkbox for
-this.
-
-### What makes a good community skill
-
-- **Methodology-focused**, not bulk-copied content. "How to reason about X"
-  beats "the full text of X".
-- **Specific scope** so the `description:` field can be precise enough that
-  loading agents pick it up at the right time.
-- **Tested in at least one agent CLI** before submission.
-
-Thanks for sharing what you've compiled!
diff --git a/README.md b/README.md
index 3b0f0052..5497ec4a 100644
--- a/README.md
+++ b/README.md
@@ -274,8 +274,6 @@ openkb skill history karpathy-thinking
 openkb skill rollback karpathy-thinking --to 2
 ```
 
-See [CONTRIBUTING.md](CONTRIBUTING.md) for how to submit your compiled skill back to the community registry at [VectifyAI/OpenKB](https://github.com/VectifyAI/OpenKB).
-
 ### Configuration
 
 Settings are initialized by `openkb init`, and stored in `.openkb/config.yaml`:
diff --git a/openkb/skill_evaluator.py b/openkb/agent/skill_evaluator.py
similarity index 87%
rename from openkb/skill_evaluator.py
rename to openkb/agent/skill_evaluator.py
index 6aa9a15d..979df4f0 100644
--- a/openkb/skill_evaluator.py
+++ b/openkb/agent/skill_evaluator.py
@@ -117,7 +117,14 @@ async def generate_eval_set(
         model=f"litellm/{model}",
         model_settings=ModelSettings(parallel_tool_calls=False),
     )
-    result = await Runner.run(agent, "Generate the eval set now.", max_turns=3)
+    from agents.exceptions import MaxTurnsExceeded
+    try:
+        result = await Runner.run(agent, "Generate the eval set now.", max_turns=3)
+    except MaxTurnsExceeded as exc:
+        raise RuntimeError(
+            "Eval set generation hit the max-turn cap. The model may be "
+            "looping; try a different model or a smaller --count."
+        ) from exc
     raw = (result.final_output or "").strip()
 
     # Strip optional code fence
@@ -126,7 +133,14 @@ async def generate_eval_set(
         if raw.startswith("json"):
             raw = raw[4:].lstrip()
 
-    data = json.loads(raw)
+    try:
+        data = json.loads(raw)
+    except json.JSONDecodeError as exc:
+        raise RuntimeError(
+            f"Eval set generator returned non-JSON output: {exc.msg}. "
+            f"Try a more capable model — small models often ignore "
+            f"'output only JSON' instructions. First 200 chars: {raw[:200]!r}"
+        ) from exc
     prompts: list[EvalPrompt] = []
     for q in data.get("should_trigger", []):
         prompts.append(EvalPrompt(question=q, expected="trigger"))
@@ -157,7 +171,14 @@ async def grade_one(
         model=f"litellm/{model}",
         model_settings=ModelSettings(parallel_tool_calls=False),
     )
-    result = await Runner.run(agent, f"Question: {question}", max_turns=2)
+    from agents.exceptions import MaxTurnsExceeded
+    try:
+        result = await Runner.run(agent, f"Question: {question}", max_turns=2)
+    except MaxTurnsExceeded as exc:
+        raise RuntimeError(
+            f"Trigger grader hit the max-turn cap on question: {question!r}. "
+            f"Try a more capable model."
+        ) from exc
     raw = (result.final_output or "").strip().upper()
     if "NO-TRIGGER" in raw or "NO TRIGGER" in raw:
         return "no-trigger"
diff --git a/openkb/cli.py b/openkb/cli.py
index 75e2dce7..84718543 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -1744,8 +1744,7 @@ def skill_eval(ctx, name, save_flag, eval_set_path, count):
     the description should activate the skill for each prompt. Prints pass
     rate + miss list.
     """
-    import asyncio
-    from openkb.skill_evaluator import (
+    from openkb.agent.skill_evaluator import (
         run_eval, save_eval_set, load_eval_set, EvalPrompt,
     )
 
@@ -1759,7 +1758,11 @@ def skill_eval(ctx, name, save_flag, eval_set_path, count):
         click.echo(f"[ERROR] Skill '{name}' not found.", err=True)
         ctx.exit(1)
 
-    _setup_llm_key(kb_dir)
+    try:
+        _setup_llm_key(kb_dir)
+    except RuntimeError as exc:
+        click.echo(f"[ERROR] {exc}", err=True)
+        ctx.exit(1)
     config = load_config(kb_dir / ".openkb" / "config.yaml")
     model = config.get("model", DEFAULT_CONFIG["model"])
 
diff --git a/openkb/skill_validator.py b/openkb/skill_validator.py
index 82de55d4..aecff89f 100644
--- a/openkb/skill_validator.py
+++ b/openkb/skill_validator.py
@@ -4,12 +4,15 @@
 make a skill un-loadable or misleading to the agents that install it:
 
   * SKILL.md missing or unparseable
-  * frontmatter missing required fields
-  * name field doesn't match directory or violates the slug rule
-  * description too long (> 1024 chars per Anthropic spec)
-  * files too big (SKILL.md > 50 KB / references/*.md > 100 KB)
-  * `[[references/...]]` wikilinks pointing at files that don't exist
-  * (strict mode) scripts/*.py importing non-stdlib modules
+  * frontmatter present, parses as YAML, is a mapping
+  * required fields: name (matches dir + slug regex), description
+  * description length within bounds (warns < 20 chars, errors > 1024)
+  * description must not contain '<' or '>' (breaks activation parser)
+  * frontmatter keys limited to the Anthropic Skills allowed set
+    (warns on unknown keys; matches Anthropic's quick_validate.py)
+  * files within size limits (SKILL.md ≤ 50 KB / references/*.md ≤ 100 KB)
+  * `[[references/...]]` wikilinks resolve to actual files
+  * (strict mode) scripts/*.py imports only stdlib modules
 
 This is the deterministic counterpart to ``openkb skill eval`` — eval
 measures whether the description fires; validate ensures the structure
@@ -32,6 +35,9 @@
 REFERENCE_MAX_BYTES = 100 * 1024
 NAME_MAX_LEN = 64
 WIKILINK_RE = re.compile(r"\[\[references/([a-z0-9._/-]+)\]\]", re.IGNORECASE)
+ALLOWED_FRONTMATTER_KEYS = {
+    "name", "description", "license", "allowed-tools", "metadata", "compatibility",
+}
 
 
 @dataclass
@@ -98,6 +104,15 @@ def validate_skill(skill_dir: Path, *, strict: bool = False) -> ValidationResult
         result.errors.append("Frontmatter must be a YAML mapping.")
         return result
 
+    extras = set(meta.keys()) - ALLOWED_FRONTMATTER_KEYS
+    if extras:
+        # Treat as warning, not error — keeps strict mode user-controllable
+        result.warnings.append(
+            f"Frontmatter contains unknown keys: {sorted(extras)}. "
+            f"Anthropic Skills spec only allows: "
+            f"{sorted(ALLOWED_FRONTMATTER_KEYS)}."
+        )
+
     # name field
     name = meta.get("name")
     if not name:
@@ -133,6 +148,11 @@ def validate_skill(skill_dir: Path, *, strict: bool = False) -> ValidationResult
                 f"Frontmatter 'description:' is only {len(desc)} chars — "
                 f"too short to be a useful activation signal."
             )
+        if "<" in desc or ">" in desc:
+            result.errors.append(
+                "Frontmatter 'description:' must not contain '<' or '>' "
+                "characters — they break the activation parser in Claude Code."
+            )
 
     # references/ wikilink resolution
     wikilinks = WIKILINK_RE.findall(text)
diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py
index 7d42297a..086eacb9 100644
--- a/tests/test_skill_cli.py
+++ b/tests/test_skill_cli.py
@@ -343,7 +343,7 @@ async def perfect_grader(description, question, *, model):
     runner = CliRunner()
     with patch("openkb.cli._find_kb_dir", return_value=kb), \
          patch("openkb.cli._setup_llm_key", return_value=None), \
-         patch("openkb.skill_evaluator.grade_one", side_effect=perfect_grader):
+         patch("openkb.agent.skill_evaluator.grade_one", side_effect=perfect_grader):
         result = runner.invoke(cli, [
             "skill", "eval", "demo", "--eval-set", str(eval_path),
         ])
@@ -374,7 +374,7 @@ async def biased_grader(description, question, *, model):
     runner = CliRunner()
     with patch("openkb.cli._find_kb_dir", return_value=kb), \
          patch("openkb.cli._setup_llm_key", return_value=None), \
-         patch("openkb.skill_evaluator.grade_one", side_effect=biased_grader):
+         patch("openkb.agent.skill_evaluator.grade_one", side_effect=biased_grader):
         result = runner.invoke(cli, [
             "skill", "eval", "demo", "--eval-set", str(eval_path),
         ])
diff --git a/tests/test_skill_evaluator.py b/tests/test_skill_evaluator.py
index 3863051e..01826f76 100644
--- a/tests/test_skill_evaluator.py
+++ b/tests/test_skill_evaluator.py
@@ -1,4 +1,4 @@
-"""Tests for openkb.skill_evaluator.
+"""Tests for openkb.agent.skill_evaluator.
 
 The Runner.run call is mocked everywhere — no real LLM tokens spent.
 What we DO verify:
@@ -17,8 +17,7 @@
 
 import pytest
 
-from openkb.skill_evaluator import (
-    EvalMiss,
+from openkb.agent.skill_evaluator import (
     EvalPrompt,
     EvalResult,
     _read_description,
@@ -80,7 +79,7 @@ async def test_generate_eval_set_parses_plain_json(tmp_path):
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output=_fake_generator_payload(10))
 
-    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=10)
 
     assert len(prompts) == 20
@@ -98,7 +97,7 @@ async def test_generate_eval_set_strips_code_fences(tmp_path):
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output=fenced)
 
-    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=3)
 
     assert len(prompts) == 6
@@ -112,7 +111,7 @@ async def test_grade_one_returns_trigger_for_trigger_response():
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="TRIGGER")
 
-    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         out = await grade_one("desc", "question?", model="gpt-4o-mini")
     assert out == "trigger"
 
@@ -122,7 +121,7 @@ async def test_grade_one_returns_no_trigger_for_negative_response():
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="NO-TRIGGER")
 
-    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         out = await grade_one("desc", "question?", model="gpt-4o-mini")
     assert out == "no-trigger"
 
@@ -132,7 +131,7 @@ async def test_grade_one_handles_mixed_case():
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="trigger")
 
-    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         out = await grade_one("desc", "question?", model="gpt-4o-mini")
     assert out == "trigger"
 
@@ -142,7 +141,7 @@ async def test_grade_one_handles_space_variant():
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="No Trigger")
 
-    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         out = await grade_one("desc", "question?", model="gpt-4o-mini")
     assert out == "no-trigger"
 
@@ -152,7 +151,7 @@ async def test_grade_one_defaults_to_no_trigger_on_ambiguous_output():
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="hmm not sure")
 
-    with patch("openkb.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         out = await grade_one("desc", "question?", model="gpt-4o-mini")
     assert out == "no-trigger"
 
@@ -179,7 +178,7 @@ async def fake_grade(description, question, *, model):
         match = next(p for p in eval_set if p.question == question)
         return match.expected
 
-    with patch("openkb.skill_evaluator.grade_one", side_effect=fake_grade):
+    with patch("openkb.agent.skill_evaluator.grade_one", side_effect=fake_grade):
         result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set)
 
     assert isinstance(result, EvalResult)
@@ -198,7 +197,7 @@ async def test_run_eval_reports_misses(tmp_path):
     async def fake_grade(description, question, *, model):
         return "trigger"
 
-    with patch("openkb.skill_evaluator.grade_one", side_effect=fake_grade):
+    with patch("openkb.agent.skill_evaluator.grade_one", side_effect=fake_grade):
         result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set)
 
     assert result.total == 6
@@ -230,3 +229,36 @@ def test_save_and_load_eval_set_round_trip(tmp_path):
     assert len(loaded) == 4
     assert [p.question for p in loaded if p.expected == "trigger"] == ["trig 0", "trig 1"]
     assert [p.question for p in loaded if p.expected == "no-trigger"] == ["no 0", "no 1"]
+
+
+# -------- RuntimeError translation for CLI catch -------------------------------
+
+
+@pytest.mark.asyncio
+async def test_generate_eval_set_translates_max_turns_to_runtime_error(tmp_path):
+    """MaxTurnsExceeded from Runner.run should become RuntimeError."""
+    from agents.exceptions import MaxTurnsExceeded
+
+    skill_dir = _make_skill(tmp_path)
+
+    async def fake_runner(*args, **kwargs):
+        raise MaxTurnsExceeded("ran out")
+
+    with patch("openkb.agent.skill_evaluator.Runner.run",
+               new=AsyncMock(side_effect=fake_runner)):
+        with pytest.raises(RuntimeError, match="max-turn cap"):
+            await generate_eval_set(skill_dir, model="gpt-4o-mini")
+
+
+@pytest.mark.asyncio
+async def test_generate_eval_set_translates_malformed_json_to_runtime_error(tmp_path):
+    """Non-JSON LLM output should produce a friendly RuntimeError."""
+    skill_dir = _make_skill(tmp_path)
+
+    async def fake_runner(*args, **kwargs):
+        return SimpleNamespace(final_output="this is not json at all")
+
+    with patch("openkb.agent.skill_evaluator.Runner.run",
+               new=AsyncMock(side_effect=fake_runner)):
+        with pytest.raises(RuntimeError, match="non-JSON output"):
+            await generate_eval_set(skill_dir, model="gpt-4o-mini")
diff --git a/tests/test_skill_validator.py b/tests/test_skill_validator.py
index 06fad87a..b59b6d31 100644
--- a/tests/test_skill_validator.py
+++ b/tests/test_skill_validator.py
@@ -303,3 +303,35 @@ def test_passed_vs_passed_strict_semantics(tmp_path):
     result = validate_skill(sd)
     assert result.passed is True
     assert result.passed_strict is False
+
+
+# ---------------------------------------------------------------------------
+# new round-2 checks: angle brackets in description + unknown frontmatter keys
+# ---------------------------------------------------------------------------
+
+def test_validator_rejects_angle_brackets_in_description(tmp_path):
+    """Anthropic's activation parser breaks on < or > in description."""
+    sd = _write_skill(
+        tmp_path, "demo",
+        frontmatter="name: demo\ndescription: Reason about <transformers> here.",
+    )
+    result = validate_skill(sd)
+    assert not result.passed
+    assert any("'<' or '>'" in e for e in result.errors)
+
+
+def test_validator_warns_on_unknown_frontmatter_keys(tmp_path):
+    """Anthropic spec only allows a fixed set of frontmatter keys."""
+    sd = _write_skill(
+        tmp_path, "demo",
+        frontmatter=(
+            "name: demo\ndescription: A valid description string here.\n"
+            "random_key: foo\nanother_one: bar"
+        ),
+    )
+    result = validate_skill(sd)
+    # Passes in default (warnings only)
+    assert result.passed
+    # But strict mode catches it
+    assert not result.passed_strict
+    assert any("unknown keys" in w for w in result.warnings)

From a4e29b1c4007197ef989594876c31f2bf9b6e803 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 16:47:20 +0800
Subject: [PATCH 24/25] refactor(skill): consolidate skill modules under
 openkb/skill/

All skill-related code now lives in a single openkb/skill/ subpackage
(7 modules + __init__). Drop the redundant 'skill_' prefix on filenames
since the package qualifies them already.

Moves:
  openkb/generator.py            -> openkb/skill/generator.py
  openkb/marketplace.py          -> openkb/skill/marketplace.py
  openkb/skill_validator.py      -> openkb/skill/validator.py
  openkb/skill_workspace.py      -> openkb/skill/workspace.py
  openkb/agent/skill_creator.py  -> openkb/skill/creator.py
  openkb/agent/skill_tools.py    -> openkb/skill/tools.py
  openkb/agent/skill_evaluator.py -> openkb/skill/evaluator.py

generator.py + marketplace.py go under skill/ for now (v0.x only has
skills); they're nominally generic primitives but YAGNI -- when a
second artifact type (ppt / podcast / report) actually lands, those
two will move back out to openkb/<shared>/.

No behavioral changes. All imports + test patch targets updated.
Test suite stays at 494 passed.
---
 openkb/agent/chat.py                          |  2 +-
 openkb/cli.py                                 | 16 ++++++------
 openkb/skill/__init__.py                      |  8 ++++++
 .../skill_creator.py => skill/creator.py}     |  2 +-
 .../skill_evaluator.py => skill/evaluator.py} |  0
 openkb/{ => skill}/generator.py               |  6 ++---
 openkb/{ => skill}/marketplace.py             |  0
 .../{agent/skill_tools.py => skill/tools.py}  |  2 +-
 .../validator.py}                             |  0
 .../workspace.py}                             |  0
 tests/test_generator.py                       |  8 +++---
 tests/test_marketplace.py                     |  4 +--
 tests/test_skill_chat_slash.py                |  2 +-
 tests/test_skill_cli.py                       | 10 +++----
 tests/test_skill_creator.py                   | 10 +++----
 tests/test_skill_evaluator.py                 | 26 +++++++++----------
 tests/test_skill_tools.py                     |  4 +--
 tests/test_skill_validator.py                 |  4 +--
 tests/test_skill_workspace.py                 |  4 +--
 19 files changed, 58 insertions(+), 50 deletions(-)
 create mode 100644 openkb/skill/__init__.py
 rename openkb/{agent/skill_creator.py => skill/creator.py} (99%)
 rename openkb/{agent/skill_evaluator.py => skill/evaluator.py} (100%)
 rename openkb/{ => skill}/generator.py (92%)
 rename openkb/{ => skill}/marketplace.py (100%)
 rename openkb/{agent/skill_tools.py => skill/tools.py} (96%)
 rename openkb/{skill_validator.py => skill/validator.py} (100%)
 rename openkb/{skill_workspace.py => skill/workspace.py} (100%)

diff --git a/openkb/agent/chat.py b/openkb/agent/chat.py
index afabc745..a2763a73 100644
--- a/openkb/agent/chat.py
+++ b/openkb/agent/chat.py
@@ -542,7 +542,7 @@ async def _handle_slash_skill(arg: str, kb_dir: Path, style: Style) -> None:
     config = load_config(kb_dir / ".openkb" / "config.yaml")
     model = config.get("model", DEFAULT_CONFIG["model"])
 
-    from openkb.generator import Generator
+    from openkb.skill.generator import Generator
     _fmt(style, ("class:slash.help", f"Compiling skill '{name}'...\n"))
     try:
         gen = Generator(
diff --git a/openkb/cli.py b/openkb/cli.py
index 84718543..79370741 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -1466,7 +1466,7 @@ def skill_new(ctx, name, intent, yes_flag):
     # into <kb>/output/skills/<name>-workspace/iteration-N/ first, so
     # the user can roll back via `openkb skill rollback`. See
     # ``openkb/skill_workspace.py``.
-    from openkb.skill_workspace import save_iteration, write_diff
+    from openkb.skill.workspace import save_iteration, write_diff
 
     target = kb_dir / "output" / "skills" / name
     saved_iteration: Path | None = None
@@ -1492,7 +1492,7 @@ def skill_new(ctx, name, intent, yes_flag):
             ctx.exit(1)
 
     # Run the generator
-    from openkb.generator import Generator
+    from openkb.skill.generator import Generator
     click.echo(f"Compiling skill '{name}'...")
     try:
         gen = Generator(
@@ -1519,7 +1519,7 @@ def skill_new(ctx, name, intent, yes_flag):
 
     # Auto-validate the freshly compiled skill. Surface issues but don't
     # block — files are on disk and the user can fix or rollback.
-    from openkb.skill_validator import validate_skill
+    from openkb.skill.validator import validate_skill
     skill_dir = kb_dir / "output" / "skills" / name
     result = validate_skill(skill_dir)
     if result.errors or result.warnings:
@@ -1551,7 +1551,7 @@ def skill_history(ctx, name):
     """List previous iterations of a skill."""
     import datetime as _dt
 
-    from openkb.skill_workspace import list_iterations
+    from openkb.skill.workspace import list_iterations
 
     kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
     if kb_dir is None:
@@ -1611,8 +1611,8 @@ def skill_history(ctx, name):
 @click.pass_context
 def skill_rollback(ctx, name, to_n, yes_flag):
     """Restore a previous iteration as the current skill."""
-    from openkb.marketplace import regenerate_marketplace
-    from openkb.skill_workspace import list_iterations, restore_iteration
+    from openkb.skill.marketplace import regenerate_marketplace
+    from openkb.skill.workspace import list_iterations, restore_iteration
 
     kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
     if kb_dir is None:
@@ -1681,7 +1681,7 @@ def skill_rollback(ctx, name, to_n, yes_flag):
 @click.pass_context
 def skill_validate(ctx, name, strict):
     """Validate one skill (by name) or all compiled skills in this KB."""
-    from openkb.skill_validator import validate_skill
+    from openkb.skill.validator import validate_skill
 
     kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
     if kb_dir is None:
@@ -1744,7 +1744,7 @@ def skill_eval(ctx, name, save_flag, eval_set_path, count):
     the description should activate the skill for each prompt. Prints pass
     rate + miss list.
     """
-    from openkb.agent.skill_evaluator import (
+    from openkb.skill.evaluator import (
         run_eval, save_eval_set, load_eval_set, EvalPrompt,
     )
 
diff --git a/openkb/skill/__init__.py b/openkb/skill/__init__.py
new file mode 100644
index 00000000..53b8c9c3
--- /dev/null
+++ b/openkb/skill/__init__.py
@@ -0,0 +1,8 @@
+"""All skill-related code (generator, marketplace, agent runtime, validator,
+workspace, evaluator) lives in this subpackage.
+
+Today's only artifact type is ``skill``; the generator + marketplace
+abstractions are nominally generic, but in v0.x they only serve skill
+artifacts. If/when ppt / podcast / report targets land, factor the
+generic primitives back out to ``openkb/<shared>/`` at that time.
+"""
diff --git a/openkb/agent/skill_creator.py b/openkb/skill/creator.py
similarity index 99%
rename from openkb/agent/skill_creator.py
rename to openkb/skill/creator.py
index 2e023c4e..d8db0314 100644
--- a/openkb/agent/skill_creator.py
+++ b/openkb/skill/creator.py
@@ -18,7 +18,7 @@
 from agents import Agent, Runner, function_tool
 from agents.model_settings import ModelSettings
 
-from openkb.agent.skill_tools import (
+from openkb.skill.tools import (
     list_wiki_dir as _list_wiki_dir_impl,
     read_wiki_file_for_skill as _read_wiki_file_impl,
     write_skill_file as _write_skill_file_impl,
diff --git a/openkb/agent/skill_evaluator.py b/openkb/skill/evaluator.py
similarity index 100%
rename from openkb/agent/skill_evaluator.py
rename to openkb/skill/evaluator.py
diff --git a/openkb/generator.py b/openkb/skill/generator.py
similarity index 92%
rename from openkb/generator.py
rename to openkb/skill/generator.py
index 843a1b06..1517aa8b 100644
--- a/openkb/generator.py
+++ b/openkb/skill/generator.py
@@ -9,15 +9,15 @@
   distribution mechanic as skills)
 
 Each target plugs in its own `run` coroutine. v0.1's only entry calls into
-``openkb.agent.skill_creator.run_skill_create``.
+``openkb.skill.creator.run_skill_create``.
 """
 from __future__ import annotations
 
 from pathlib import Path
 from typing import Literal
 
-from openkb.agent.skill_creator import run_skill_create
-from openkb.marketplace import regenerate_marketplace
+from openkb.skill.creator import run_skill_create
+from openkb.skill.marketplace import regenerate_marketplace
 
 
 TargetType = Literal["skill"]  # extend as new targets land
diff --git a/openkb/marketplace.py b/openkb/skill/marketplace.py
similarity index 100%
rename from openkb/marketplace.py
rename to openkb/skill/marketplace.py
diff --git a/openkb/agent/skill_tools.py b/openkb/skill/tools.py
similarity index 96%
rename from openkb/agent/skill_tools.py
rename to openkb/skill/tools.py
index 3b7c736e..357b3a29 100644
--- a/openkb/agent/skill_tools.py
+++ b/openkb/skill/tools.py
@@ -2,7 +2,7 @@
 
 The skill-create agent runs with three capabilities:
   * READ from <kb>/wiki/**            (the substrate)
-  * QUERY the wiki via openkb query   (semantic retrieval, see skill_creator.py)
+  * QUERY the wiki via openkb query   (semantic retrieval, see creator.py)
   * WRITE under <kb>/output/skills/<name>/**   (the only output destination)
 
 These helpers enforce those boundaries at the Python level — every tool
diff --git a/openkb/skill_validator.py b/openkb/skill/validator.py
similarity index 100%
rename from openkb/skill_validator.py
rename to openkb/skill/validator.py
diff --git a/openkb/skill_workspace.py b/openkb/skill/workspace.py
similarity index 100%
rename from openkb/skill_workspace.py
rename to openkb/skill/workspace.py
diff --git a/tests/test_generator.py b/tests/test_generator.py
index 1ef179f5..37d574f3 100644
--- a/tests/test_generator.py
+++ b/tests/test_generator.py
@@ -1,4 +1,4 @@
-"""Tests for openkb.generator.Generator — the v0.1 abstraction that will
+"""Tests for openkb.skill.generator.Generator — the v0.1 abstraction that will
 be reused by future ppt / podcast generators.
 
 In v0.1, only target_type='skill' is supported. We test the dispatch shape
@@ -8,7 +8,7 @@
 import pytest
 from unittest.mock import AsyncMock, patch
 
-from openkb.generator import Generator
+from openkb.skill.generator import Generator
 
 
 def _make_kb(tmp_path):
@@ -53,8 +53,8 @@ async def test_generator_run_delegates_to_skill_creator(tmp_path):
         kb_dir=kb,
         model="gpt-4o-mini",
     )
-    with patch("openkb.generator.run_skill_create", new=AsyncMock()) as runner, \
-         patch("openkb.generator.regenerate_marketplace") as regen:
+    with patch("openkb.skill.generator.run_skill_create", new=AsyncMock()) as runner, \
+         patch("openkb.skill.generator.regenerate_marketplace") as regen:
         await g.run()
     runner.assert_awaited_once()
     regen.assert_called_once_with(kb)
diff --git a/tests/test_marketplace.py b/tests/test_marketplace.py
index a34c34ba..b122f6b7 100644
--- a/tests/test_marketplace.py
+++ b/tests/test_marketplace.py
@@ -1,11 +1,11 @@
-"""Tests for openkb.marketplace — regenerate <kb>/.claude-plugin/marketplace.json
+"""Tests for openkb.skill.marketplace — regenerate <kb>/.claude-plugin/marketplace.json
 from <kb>/output/skills/*/SKILL.md."""
 from __future__ import annotations
 
 import json
 import textwrap
 
-from openkb.marketplace import regenerate_marketplace
+from openkb.skill.marketplace import regenerate_marketplace
 
 
 def _make_kb(tmp_path):
diff --git a/tests/test_skill_chat_slash.py b/tests/test_skill_chat_slash.py
index 100135a2..d99dd152 100644
--- a/tests/test_skill_chat_slash.py
+++ b/tests/test_skill_chat_slash.py
@@ -36,7 +36,7 @@ async def fake_run(kb_dir, skill_name, intent, model):
             f"---\nname: {skill_name}\ndescription: t\n---\n\n# {skill_name}\n"
         )
 
-    with patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
+    with patch("openkb.skill.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
         action = await _handle_slash(
             '/skill new demo "test intent"', kb, session, style
         )
diff --git a/tests/test_skill_cli.py b/tests/test_skill_cli.py
index 086eacb9..b8575327 100644
--- a/tests/test_skill_cli.py
+++ b/tests/test_skill_cli.py
@@ -42,7 +42,7 @@ async def fake_run(kb_dir, skill_name, intent, model):
         _fake_compile(kb_dir, skill_name)
 
     with patch("openkb.cli._find_kb_dir", return_value=kb), \
-         patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
+         patch("openkb.skill.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
         result = runner.invoke(cli, ["skill", "new", "demo", "test intent"])
 
     assert result.exit_code == 0, result.output
@@ -123,7 +123,7 @@ async def fake_run(kb_dir, skill_name, intent, model):
         _fake_compile(kb_dir, skill_name)
 
     with patch("openkb.cli._find_kb_dir", return_value=kb), \
-         patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
+         patch("openkb.skill.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
         result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"])
 
     assert result.exit_code == 0, result.output
@@ -147,7 +147,7 @@ async def fake_run(kb_dir, skill_name, intent, model):
         _fake_compile(kb_dir, skill_name)
 
     with patch("openkb.cli._find_kb_dir", return_value=kb), \
-         patch("openkb.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
+         patch("openkb.skill.generator.run_skill_create", new=AsyncMock(side_effect=fake_run)):
         result = runner.invoke(cli, ["skill", "new", "demo", "x", "-y"])
 
     assert result.exit_code == 0, result.output
@@ -343,7 +343,7 @@ async def perfect_grader(description, question, *, model):
     runner = CliRunner()
     with patch("openkb.cli._find_kb_dir", return_value=kb), \
          patch("openkb.cli._setup_llm_key", return_value=None), \
-         patch("openkb.agent.skill_evaluator.grade_one", side_effect=perfect_grader):
+         patch("openkb.skill.evaluator.grade_one", side_effect=perfect_grader):
         result = runner.invoke(cli, [
             "skill", "eval", "demo", "--eval-set", str(eval_path),
         ])
@@ -374,7 +374,7 @@ async def biased_grader(description, question, *, model):
     runner = CliRunner()
     with patch("openkb.cli._find_kb_dir", return_value=kb), \
          patch("openkb.cli._setup_llm_key", return_value=None), \
-         patch("openkb.agent.skill_evaluator.grade_one", side_effect=biased_grader):
+         patch("openkb.skill.evaluator.grade_one", side_effect=biased_grader):
         result = runner.invoke(cli, [
             "skill", "eval", "demo", "--eval-set", str(eval_path),
         ])
diff --git a/tests/test_skill_creator.py b/tests/test_skill_creator.py
index e166c418..5783d628 100644
--- a/tests/test_skill_creator.py
+++ b/tests/test_skill_creator.py
@@ -1,4 +1,4 @@
-"""Tests for openkb.agent.skill_creator.
+"""Tests for openkb.skill.creator.
 
 The agent itself is mocked (we don't want to spend tokens in unit tests).
 What we DO test:
@@ -15,7 +15,7 @@
 import pytest
 from unittest.mock import AsyncMock, patch
 
-from openkb.agent.skill_creator import (
+from openkb.skill.creator import (
     build_skill_create_agent,
     run_skill_create,
 )
@@ -56,7 +56,7 @@ async def fake_runner(*args, **kwargs):
         from types import SimpleNamespace
         return SimpleNamespace(final_output="done")
 
-    with patch("openkb.agent.skill_creator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.skill.creator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         await run_skill_create(
             kb_dir=kb,
             skill_name="demo",
@@ -77,7 +77,7 @@ async def fake_runner(*args, **kwargs):
         from types import SimpleNamespace
         return SimpleNamespace(final_output="done")
 
-    with patch("openkb.agent.skill_creator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.skill.creator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         with pytest.raises(RuntimeError, match="did not write SKILL.md"):
             await run_skill_create(
                 kb_dir=kb,
@@ -98,7 +98,7 @@ async def test_run_skill_create_translates_max_turns_to_runtime_error(tmp_path):
     async def fake_runner(*args, **kwargs):
         raise MaxTurnsExceeded("agent ran out of turns")
 
-    with patch("openkb.agent.skill_creator.Runner.run",
+    with patch("openkb.skill.creator.Runner.run",
                new=AsyncMock(side_effect=fake_runner)):
         with pytest.raises(RuntimeError, match="step cap"):
             await run_skill_create(
diff --git a/tests/test_skill_evaluator.py b/tests/test_skill_evaluator.py
index 01826f76..840bbbfa 100644
--- a/tests/test_skill_evaluator.py
+++ b/tests/test_skill_evaluator.py
@@ -1,4 +1,4 @@
-"""Tests for openkb.agent.skill_evaluator.
+"""Tests for openkb.skill.evaluator.
 
 The Runner.run call is mocked everywhere — no real LLM tokens spent.
 What we DO verify:
@@ -17,7 +17,7 @@
 
 import pytest
 
-from openkb.agent.skill_evaluator import (
+from openkb.skill.evaluator import (
     EvalPrompt,
     EvalResult,
     _read_description,
@@ -79,7 +79,7 @@ async def test_generate_eval_set_parses_plain_json(tmp_path):
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output=_fake_generator_payload(10))
 
-    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=10)
 
     assert len(prompts) == 20
@@ -97,7 +97,7 @@ async def test_generate_eval_set_strips_code_fences(tmp_path):
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output=fenced)
 
-    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         prompts = await generate_eval_set(skill_dir, model="gpt-4o-mini", count=3)
 
     assert len(prompts) == 6
@@ -111,7 +111,7 @@ async def test_grade_one_returns_trigger_for_trigger_response():
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="TRIGGER")
 
-    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         out = await grade_one("desc", "question?", model="gpt-4o-mini")
     assert out == "trigger"
 
@@ -121,7 +121,7 @@ async def test_grade_one_returns_no_trigger_for_negative_response():
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="NO-TRIGGER")
 
-    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         out = await grade_one("desc", "question?", model="gpt-4o-mini")
     assert out == "no-trigger"
 
@@ -131,7 +131,7 @@ async def test_grade_one_handles_mixed_case():
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="trigger")
 
-    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         out = await grade_one("desc", "question?", model="gpt-4o-mini")
     assert out == "trigger"
 
@@ -141,7 +141,7 @@ async def test_grade_one_handles_space_variant():
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="No Trigger")
 
-    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         out = await grade_one("desc", "question?", model="gpt-4o-mini")
     assert out == "no-trigger"
 
@@ -151,7 +151,7 @@ async def test_grade_one_defaults_to_no_trigger_on_ambiguous_output():
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="hmm not sure")
 
-    with patch("openkb.agent.skill_evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
+    with patch("openkb.skill.evaluator.Runner.run", new=AsyncMock(side_effect=fake_runner)):
         out = await grade_one("desc", "question?", model="gpt-4o-mini")
     assert out == "no-trigger"
 
@@ -178,7 +178,7 @@ async def fake_grade(description, question, *, model):
         match = next(p for p in eval_set if p.question == question)
         return match.expected
 
-    with patch("openkb.agent.skill_evaluator.grade_one", side_effect=fake_grade):
+    with patch("openkb.skill.evaluator.grade_one", side_effect=fake_grade):
         result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set)
 
     assert isinstance(result, EvalResult)
@@ -197,7 +197,7 @@ async def test_run_eval_reports_misses(tmp_path):
     async def fake_grade(description, question, *, model):
         return "trigger"
 
-    with patch("openkb.agent.skill_evaluator.grade_one", side_effect=fake_grade):
+    with patch("openkb.skill.evaluator.grade_one", side_effect=fake_grade):
         result = await run_eval(skill_dir, model="gpt-4o-mini", eval_set=eval_set)
 
     assert result.total == 6
@@ -244,7 +244,7 @@ async def test_generate_eval_set_translates_max_turns_to_runtime_error(tmp_path)
     async def fake_runner(*args, **kwargs):
         raise MaxTurnsExceeded("ran out")
 
-    with patch("openkb.agent.skill_evaluator.Runner.run",
+    with patch("openkb.skill.evaluator.Runner.run",
                new=AsyncMock(side_effect=fake_runner)):
         with pytest.raises(RuntimeError, match="max-turn cap"):
             await generate_eval_set(skill_dir, model="gpt-4o-mini")
@@ -258,7 +258,7 @@ async def test_generate_eval_set_translates_malformed_json_to_runtime_error(tmp_
     async def fake_runner(*args, **kwargs):
         return SimpleNamespace(final_output="this is not json at all")
 
-    with patch("openkb.agent.skill_evaluator.Runner.run",
+    with patch("openkb.skill.evaluator.Runner.run",
                new=AsyncMock(side_effect=fake_runner)):
         with pytest.raises(RuntimeError, match="non-JSON output"):
             await generate_eval_set(skill_dir, model="gpt-4o-mini")
diff --git a/tests/test_skill_tools.py b/tests/test_skill_tools.py
index f5db686d..99f07ee4 100644
--- a/tests/test_skill_tools.py
+++ b/tests/test_skill_tools.py
@@ -1,7 +1,7 @@
-"""Tests for openkb.agent.skill_tools — path-scoped IO for the skill-create agent."""
+"""Tests for openkb.skill.tools — path-scoped IO for the skill-create agent."""
 from __future__ import annotations
 
-from openkb.agent.skill_tools import (
+from openkb.skill.tools import (
     list_wiki_dir,
     read_wiki_file_for_skill,
     write_skill_file,
diff --git a/tests/test_skill_validator.py b/tests/test_skill_validator.py
index b59b6d31..39952fc9 100644
--- a/tests/test_skill_validator.py
+++ b/tests/test_skill_validator.py
@@ -1,10 +1,10 @@
-"""Unit tests for openkb.skill_validator — pure-Python structural checks
+"""Unit tests for openkb.skill.validator — pure-Python structural checks
 on a compiled skill directory. No LLM, no network."""
 from __future__ import annotations
 
 from pathlib import Path
 
-from openkb.skill_validator import (
+from openkb.skill.validator import (
     DESCRIPTION_MAX_CHARS,
     REFERENCE_MAX_BYTES,
     SKILL_MD_MAX_BYTES,
diff --git a/tests/test_skill_workspace.py b/tests/test_skill_workspace.py
index ac4b5af7..2fb9a157 100644
--- a/tests/test_skill_workspace.py
+++ b/tests/test_skill_workspace.py
@@ -1,11 +1,11 @@
-"""Tests for :mod:`openkb.skill_workspace` — iteration save/restore + diff."""
+"""Tests for :mod:`openkb.skill.workspace` — iteration save/restore + diff."""
 from __future__ import annotations
 
 from pathlib import Path
 
 import pytest
 
-from openkb.skill_workspace import (
+from openkb.skill.workspace import (
     list_iterations,
     restore_iteration,
     save_iteration,

From 1679c8cedac512583003e0d18b0ef19b07cd84db Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Mon, 18 May 2026 19:47:39 +0800
Subject: [PATCH 25/25] docs(readme): drop duplicate Features section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Features list at the top duplicated everything already covered by
Quick Start (step-by-step feature tour) + Usage (Wiki Foundation +
Generators tables). It also duplicated the Skill Factory slogan that
now lives canonically in the Skill Factory subsection.

Replace 17 lines with a one-sentence pointer to Usage. 405 → 389 lines.
Slogan now appears exactly once (in the Skill Factory subsection)
instead of 5 places.

Other duplicates (PageIndex mentions, cross-CLI install instructions,
Karpathy comparison table) left for now — they target different
audiences (scanners vs deep readers) and aren't worth touching in this
pass.
---
 README.md | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 5497ec4a..f9c73619 100644
--- a/README.md
+++ b/README.md
@@ -22,23 +22,7 @@ The idea is based on a [concept](https://x.com/karpathy/status/20398056595256445
 
 Traditional RAG rediscovers knowledge from scratch on every query. Nothing accumulates. OpenKB compiles knowledge once into a persistent wiki, then keeps it current. Cross-references already exist. Contradictions are flagged. Synthesis reflects everything consumed.
 
-### Features
-
-OpenKB has two layers: a **wiki foundation** that compiles and maintains your knowledge, and **generators** that turn that wiki into useful artifacts.
-
-**Wiki foundation** — compile and maintain
-- **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel, text, and more via markitdown
-- **Scale to long documents** — Handled via [PageIndex](https://github.com/VectifyAI/PageIndex) tree indexing, enabling accurate, vectorless long-context retrieval
-- **Native multi-modality** — Retrieves and understands figures, tables, and images, not just text
-- **Compiled Wiki** — LLM compiles documents into summaries, concept pages, and cross-links, all kept in sync
-- **Lint** — Health checks find contradictions, gaps, orphans, and stale content
-- **Watch mode** — Drop files into `raw/`, wiki updates automatically
-- **Obsidian compatible** — Wiki is plain `.md` files with `[[wikilinks]]`. Open in Obsidian for graph view and browsing
-
-**Generators** — turn the wiki into something useful
-- **Query** — Ask questions (one-off) against your wiki. The LLM navigates compiled knowledge to answer
-- **Chat** — Multi-turn conversations with persisted sessions you can resume across runs
-- **Skill Factory** — *Drop in a book. Out comes a digital expert.* Compile any subset of your wiki into a redistributable [Anthropic Skill](https://docs.claude.com/en/docs/build-with-claude/skills) installable in Claude Code / Codex / Gemini CLI / Cursor
+OpenKB has two layers: a **wiki foundation** that compiles and maintains your knowledge, and **generators** (query / chat / Skill Factory) that turn it into useful output. See [Usage](#️-usage) for the full command list.
 
 # 🚀 Getting Started