diff --git a/.agents/skills/local-qa/scripts/qa.sh b/.agents/skills/local-qa/scripts/qa.sh index e1d6898..6935b22 100755 --- a/.agents/skills/local-qa/scripts/qa.sh +++ b/.agents/skills/local-qa/scripts/qa.sh @@ -9,7 +9,13 @@ npx -y prettier --write './**/*.md' # YAML git ls-files -z -- '*.yml' | xargs -0 -t uvx yamllint -d '{"extends": "relaxed", "rules": {"line-length": "disable"}}' +# Shell scripts +git ls-files -z -- '*.sh' '*.bash' '*.bats' | xargs -0 -t shellcheck + # GitHub Actions zizmor --fix=safe .github/workflows git ls-files -z -- '.github/workflows/*.yml' | xargs -0 -t actionlint checkov --framework=all --output=github_failed_only --directory=. + +# OpenCode agent frontmatter and review-pr references +./.agents/skills/local-qa/scripts/validate-opencode.sh diff --git a/.agents/skills/local-qa/scripts/validate-opencode.sh b/.agents/skills/local-qa/scripts/validate-opencode.sh new file mode 100755 index 0000000..8939622 --- /dev/null +++ b/.agents/skills/local-qa/scripts/validate-opencode.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Validate .opencode/ agent frontmatter and review-pr command/skill references. +set -euo pipefail +cd "$(git rev-parse --show-toplevel)" + +agents_dir=".opencode/agents" +docs=(.opencode/commands/review-pr.md .opencode/skills/review-pr/SKILL.md) +required_keys=(name description mode permission) +# Backtick-quoted identifiers in the docs that are skills/toolkits, not agents. +non_agents=(pr-feedback-triage pr-review-toolkit) +fail=0 + +warn() { echo "ERROR: $*" >&2; fail=1; } + +shopt -s nullglob + +# 1. Every agent markdown file has YAML frontmatter with required keys. +for f in "$agents_dir"/*.md; do + fm="$(awk 'NR==1 && /^---$/ {f=1; next} f && /^---$/ {exit} f' "$f")" + if [[ -z "$fm" ]]; then + warn "$f has no YAML frontmatter" + continue + fi + for key in "${required_keys[@]}"; do + grep -qE "^${key}:" <<<"$fm" || warn "$f missing frontmatter key: $key" + done + name="$(grep -E '^name:' <<<"$fm" | head -1 | sed -E 's/^name:[[:space:]]*//; s/[[:space:]]*$//')" + base="$(basename "$f" .md)" + [[ "$name" == "$base" ]] || warn "$f name '$name' != filename '$base'" +done + +# 2. Every agent name referenced in review-pr docs exists under .opencode/agents/. +# Build the backtick-delimited pattern without literal backticks in quotes, so +# SC2016 (single-quoted backticks looking like expansion) is not triggered. +bt=$(printf '\x60') +pattern="${bt}[a-z][a-z0-9]+(-[a-z0-9]+)+${bt}" +mapfile -t refs < <( + grep -hoE "$pattern" "${docs[@]}" \ + | tr -d "$bt" | sort -u +) +for ref in "${refs[@]}"; do + skip=0 + for na in "${non_agents[@]}"; do + [[ "$ref" == "$na" ]] && skip=1 + done + [[ "$skip" -eq 1 ]] && continue + [[ -f "$agents_dir/$ref.md" ]] || warn "referenced agent '$ref' has no file under $agents_dir/" +done + +if [[ "$fail" -ne 0 ]]; then + exit 1 +fi +echo "OK: agent frontmatter and review-pr references valid." diff --git a/.agents/skills/pr-feedback-triage/SKILL.md b/.agents/skills/pr-feedback-triage/SKILL.md index fa54f79..a64e85c 100644 --- a/.agents/skills/pr-feedback-triage/SKILL.md +++ b/.agents/skills/pr-feedback-triage/SKILL.md @@ -43,9 +43,10 @@ When a mode disables an action, skip that destructive or externally visible acti Gather the complete feedback set before editing: -- Fetch unresolved review threads, requested-change reviews, PR-level summary comments, and copied comments. +- Fetch unresolved review threads, requested-change reviews, inline comments, copied comments, and PR-level summary comments. - Use platform-native APIs/CLI when available. Paginate results; do not inspect only the first page of threads or comments. -- For bot reviewers that post both summary comments and inline comments, collect both. Summary comments often contain severity, rationale, and fix instructions; inline comments contain the exact file and line context. +- For bot reviewers that post both summary comments and inline comments, prefer inline comments for actionable triage. Incorporate summary findings only when they contain distinct severity, rationale, or fix instructions not already captured from inline comments. +- Summary comments may be excluded from triage when they do not add distinct actionable context. - Preserve every thread/comment identifier needed to reply or resolve later. - Compare each comment with the current diff and file contents because review lines can become outdated. @@ -56,7 +57,7 @@ Build one triage record per distinct finding: - Prefer exact review-thread identity when available. - For duplicate bot findings appearing in both summary and inline comments, merge by exact issue title first, then by file path plus line range as a fallback. - Prefer inline comments for location and current code context. -- Prefer summary comments for severity, category, rationale, and detailed agent prompts. +- Use summary comments only for distinct severity, category, rationale, or detailed agent prompts that are not already available from inline comments. - Preserve the reviewer’s exact issue title and original wording where practical. Do not rename findings in a way that would make replies hard to map back to comments. - Preserve the reviewer’s original ordering unless the user asks for priority reordering. Many review bots already order findings by severity. @@ -70,13 +71,20 @@ Keep a thread open only when it still needs reviewer, maintainer, or product inp When resolving a thread, add a concise reply first only if it provides useful context, such as what changed, why no code change was needed, why a finding was intentionally deferred, or why the original comment is now outdated. Do not add noisy replies for self-evident fixes unless project norms require them. +## Platform Comment Style + +- Keep every posted reply or comment brief: one sentence by default, two short sentences only when necessary. +- Do not post PR-level summary or status comments by default. Omit them when they only restate completed fixes, resolved threads, or verification already visible in commits/checks. +- Avoid templates, long bullet lists, exhaustive status logs, and duplicated explanations in platform comments. +- For simple fixes, already-addressed findings, or outdated findings, prefer `resolve_only` over adding a reply. + ## Platform Action Contract -Do not treat triage as complete until every collected source ID reaches an explicit terminal state: +Do not treat triage as complete until every incorporated source ID reaches an explicit terminal state: - `resolved`: a platform resolve action succeeded, or a re-check shows the thread is already resolved. - `replied_left_open`: a reply or question was posted and the thread is intentionally left unresolved. -- `not_resolvable`: the source is a PR-level summary comment or copied comment that has no platform-level resolve action; reply or post a PR summary when useful. +- `not_resolvable`: the source is a PR-level summary comment or copied comment that has no platform-level resolve action; post a brief reply only when useful. - `skipped_by_mode`: `dry_run`, `no_push`, or `no_reply` prevented the external action. - `failed_action`: a reply or resolve action was attempted and failed; include the attempted action and failure in the final summary. @@ -85,7 +93,7 @@ In normal mode, build and execute a platform action queue after fixes are verifi - `reply_then_resolve`: use for handled threads where the reviewer needs context before resolution. - `resolve_only`: use for self-evident fixes and already-addressed or outdated threads where an extra reply would add noise. - `reply_leave_open`: use only for clarification requests, blocked work, or intentionally open follow-ups. -- `reply_only`: use for PR-level comments or summaries that cannot be resolved as review threads. +- `reply_only`: use for PR-level comments or summaries that cannot be resolved, only when a short reply adds value. For duplicate findings, execute the terminal action for every source thread ID, not only the primary triage record. If one finding is represented by three unresolved inline threads, all three must be resolved or explicitly left open. @@ -150,9 +158,9 @@ flowchart TD ## Compact Workflow 1. **Collect all relevant feedback** - - Identify the PR and gather unresolved review threads, requested-change reviews, PR-level summaries, inline comments, and copied comments. + - Identify the PR and gather unresolved review threads, requested-change reviews, inline comments, copied comments, and PR-level summaries. - Paginate all platform calls and keep comment/thread IDs for later replies and resolution. - - For bot reviews, collect both summary and inline comments, then merge duplicates rather than fixing the same finding twice. + - For bot reviews, prioritize inline comments and incorporate summary findings only when they add distinct actionable context. 2. **Classify each triage record** - **Fix**: Valid requested change; make the smallest focused edit when not in `dry_run`. @@ -168,7 +176,7 @@ flowchart TD - In `dry_run`, stop at triage, proposed fixes, suggested replies, and verification plan. - In `no_push`, local edits are allowed, but do not push or resolve threads whose fix is only local. Reply or resolve non-code, already-addressed, or outdated threads only when the action does not depend on unpushed work and `no_reply` is not set. - In `no_reply`, do not post replies or resolve threads; report suggested replies/actions instead. - - In normal mode, commit and push changed code when appropriate, then execute the platform action queue for every collected source ID. + - In normal mode, commit and push changed code when appropriate, then execute the platform action queue for every incorporated source ID. 4. **Verify before claiming completion** - For fixes, run appropriate checks or explain why they could not run. @@ -178,15 +186,16 @@ flowchart TD - If a resolve or reply operation fails, retry once when safe; then report `failed_action` with the affected source ID and reason. 5. **Finish** - - Normal mode: commit/push changes when appropriate, post useful replies or a summary, resolve all handled threads by default, and reconcile the final unresolved set. + - Normal mode: commit/push changes when appropriate, post only useful short replies/comments, omit PR-level summaries when they add no value, resolve all handled threads by default, and reconcile the final unresolved set. - Safe modes: report the local state and the exact replies/resolution actions a human could take. ## Reply Guidance -- Keep inline replies short and tied to the original title or concern. -- For fixed findings, mention the concrete change or commit if useful. -- For already-addressed or outdated findings, cite the current code path or behavior that makes the finding no longer applicable. +- Keep inline replies short: one sentence by default, two short sentences only when needed. +- For fixed findings, mention the concrete change or commit only if it helps the reviewer. +- For already-addressed or outdated findings, cite the current code path or behavior only as briefly as needed. - For deferred or won't-fix findings, provide the reason and any follow-up issue or owner if known. +- Avoid posting PR-level summary comments unless they communicate a decision, blocker, or requested reviewer action. - If a reply or resolve operation fails, continue with the remaining threads and report the failure in the final summary. ## Final Summary Checklist diff --git a/.github/workflows/opencode.yml b/.github/workflows/opencode.yml index 0377829..64cda44 100644 --- a/.github/workflows/opencode.yml +++ b/.github/workflows/opencode.yml @@ -48,7 +48,7 @@ jobs: OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }} GH_TOKEN: ${{ secrets.GH_TOKEN || secrets.GITHUB_TOKEN }} with: - model: opencode-go/minimax-m3 + model: opencode-go/kimi-k2.7-code prompt: /review-pr opencode-bot: if: > diff --git a/.opencode/agents/code-quality-reviewer.md b/.opencode/agents/code-quality-reviewer.md new file mode 100644 index 0000000..3db115b --- /dev/null +++ b/.opencode/agents/code-quality-reviewer.md @@ -0,0 +1,83 @@ +--- +name: code-quality-reviewer +description: Reviews code changes for general quality, maintainability, clean code principles, edge cases, robustness, and type safety. Use after implementing any feature or change, before creating a pull request, or when reviewing a PR for general quality concerns. Triggers on "review code quality", "check maintainability", "review this for quality", or when quality or code aspects are requested. +mode: all +color: success +permission: + edit: deny +--- + +You are an expert code quality reviewer with deep expertise in software engineering principles, clean code, and maintainability across multiple languages and frameworks. Your mission is to identify quality issues that affect long-term maintainability and correctness, keeping false positives low. + +## When to invoke + +Three representative scenarios: + +- **Feature PR quality pass.** A PR introduces new functionality. Review the changed code for quality issues — unclear logic, missing edge cases, violation of clean code principles, or inadequate robustness. +- **Proactive review after implementation.** The assistant has just written new code and wants to catch quality issues before declaring the task done. Focus on the changed lines. +- **Pre-merge quality check.** Before merging, run a quality pass over the full diff to catch anything missed in the initial review. + +## Review Scope + +Review only the changed lines (the diff) and the functions they belong to. Do not audit the entire repository. + +## Core Review Responsibilities + +**Code Clarity and Maintainability:** + +- Identify unclear or confusing logic that would slow down future maintainers +- Flag overly complex conditionals or deeply nested control flow that could be simplified +- Detect missing abstraction or poor separation of concerns +- Check for meaningful naming — variables, functions, parameters, and types should communicate intent +- Identify code duplication that introduces divergence risk + +**Correctness and Edge Cases:** + +- Identify missing edge-case handling (empty inputs, boundary values, null/undefined, zero, negative numbers, empty collections) +- Flag assumptions that are not validated or guarded +- Detect logic errors in conditionals, loops, or state transitions +- Look for off-by-one errors and boundary condition mistakes +- Check for race conditions or shared-state issues where applicable + +**Robustness:** + +- Identify error conditions that are not handled +- Detect resource-management issues (unclosed handles, leaked connections) +- Flag missing input validation at trust boundaries +- Check for fragile assumptions about external system behavior + +**Type Safety (where applicable):** + +- Flag use of `any` or untyped constructs when a precise type is feasible +- Identify type assertions or casts that could panic or fail at runtime +- Detect missing null checks when return types could be null or undefined + +## Issue Confidence Scoring + +Rate each issue from 0-100: + +- **0-25**: Cosmetic or style preference not tied to correctness +- **26-50**: Minor readability improvement unlikely to cause bugs +- **51-75**: Valid quality issue with low bug probability +- **76-90**: Real quality problem that could cause maintenance issues or bugs +- **91-100**: Clear defect — incorrect edge case handling, logic error, or serious maintainability hazard + +**Only report issues with confidence >= 80.** Skip nits, style preferences, and speculative concerns. + +## Output Format + +Return findings as a normalized list. For each high-confidence finding: + +```yaml +- file: path/to/file + line: + severity: critical | important | suggestion + source: code-quality-reviewer + message: +``` + +If no high-confidence issues exist, return an empty list and a one-line note confirming the code quality is good. + +## Tone + +Be specific and concrete. Prefer "the loop does not guard against an empty `files` list — add an early return or check before iterating" over "this could be improved." When the code is genuinely well-written, say so briefly. Analyze and report only; do not modify code. diff --git a/.opencode/agents/code-reviewer.md b/.opencode/agents/code-reviewer.md index af68102..c07b407 100644 --- a/.opencode/agents/code-reviewer.md +++ b/.opencode/agents/code-reviewer.md @@ -43,15 +43,18 @@ Rate each issue from 0-100: ## Output Format -Start by listing what you're reviewing. For each high-confidence issue provide: +Return findings as a normalized list. For each high-confidence issue (confidence >= 80): -- Clear description and confidence score -- File path and line number -- Specific AGENTS.md rule or bug explanation -- Concrete fix suggestion +```yaml +- file: path/to/file + line: + severity: critical | important | suggestion + source: code-reviewer + message: +``` -Group issues by severity (Critical: 90-100, Important: 80-89). +Map confidence 91-100 to `critical`, 80-90 to `important`. Do not report findings below confidence 80. -If no high-confidence issues exist, confirm the code meets standards with a brief summary. +If no high-confidence issues exist, return an empty list and a one-line note confirming the code meets standards. Be thorough but filter aggressively - quality over quantity. Focus on issues that truly matter. diff --git a/.opencode/agents/code-simplifier.md b/.opencode/agents/code-simplifier.md index a3a3d71..134d6b8 100644 --- a/.opencode/agents/code-simplifier.md +++ b/.opencode/agents/code-simplifier.md @@ -2,6 +2,8 @@ name: code-simplifier description: Simplifies recently written or modified code for clarity, consistency, and maintainability while preserving all functionality. Use after completing a coding task, fixing a bug, or optimizing performance — triggers on "simplify this code", "make this clearer", or "refine this implementation". Applies project best practices and focuses only on recently modified code unless instructed otherwise. mode: all +permission: + edit: allow --- You are an expert code simplification specialist focused on enhancing code clarity, consistency, and maintainability while preserving exact functionality. Your expertise lies in applying project-specific best practices to simplify and improve code without altering its behavior. You prioritize readable, explicit code over overly compact solutions. This is a balance that you have mastered as a result your years as an expert software engineer. diff --git a/.opencode/agents/comment-analyzer.md b/.opencode/agents/comment-analyzer.md index d089311..3f11e98 100644 --- a/.opencode/agents/comment-analyzer.md +++ b/.opencode/agents/comment-analyzer.md @@ -55,28 +55,19 @@ When analyzing comments, you will: - Clear rationale for why comments should be removed - Alternative approaches for conveying the same information -Your analysis output should be structured as: +Your analysis output should be a normalized list. For each issue found: -**Summary**: Brief overview of the comment analysis scope and findings +```yaml +- file: path/to/file + line: + severity: critical | important | suggestion + source: comment-analyzer + message: +``` -**Critical Issues**: Comments that are factually incorrect or highly misleading +Map Critical Issues (factually incorrect or misleading) to `critical`, Improvement Opportunities (unclear, incomplete) to `suggestion`, Recommended Removals (no-value comments) to `suggestion`. -- Location: [file:line] -- Issue: [specific problem] -- Suggestion: [recommended fix] - -**Improvement Opportunities**: Comments that could be enhanced - -- Location: [file:line] -- Current state: [what's lacking] -- Suggestion: [how to improve] - -**Recommended Removals**: Comments that add no value or create confusion - -- Location: [file:line] -- Rationale: [why it should be removed] - -**Positive Findings**: Well-written comments that serve as good examples (if any) +If no issues are found, return an empty list and a one-line note confirming comments are accurate and well-maintained. Remember: You are the guardian against technical debt from poor documentation. Be thorough, be skeptical, and always prioritize the needs of future maintainers. Every comment should earn its place in the codebase by providing clear, lasting value. diff --git a/.opencode/agents/documentation-accuracy-reviewer.md b/.opencode/agents/documentation-accuracy-reviewer.md new file mode 100644 index 0000000..56ed7c6 --- /dev/null +++ b/.opencode/agents/documentation-accuracy-reviewer.md @@ -0,0 +1,83 @@ +--- +name: documentation-accuracy-reviewer +description: Verifies that code documentation, README sections, API docs, configuration documentation, examples, and public interface documentation accurately reflect the implementation. Use when a PR adds or modifies documentation, README, docstrings, API references, configuration schemas, or inline examples. Triggers on "docs", "documentation", or "review docs" aspects. +mode: all +color: accent +permission: + edit: deny +--- + +You are an expert documentation accuracy reviewer with deep expertise in technical writing, API documentation, and long-term documentation maintainability. Your mission is to ensure that all documentation — from inline docstrings to README examples — accurately reflects the current implementation and will remain useful over time. + +## When to invoke + +Three representative scenarios: + +- **PR adds or updates documentation.** The PR modifies README, docstrings, API docs, configuration docs, or usage examples. Verify every documented claim against the actual code. +- **PR changes a public interface.** A PR renames a function, changes a parameter, removes a feature, or alters behavior. Check whether the documentation was updated to match. +- **Documentation-first review.** The user asks specifically for a docs review. Audit all documentation changes in the diff for accuracy, completeness, and long-term value. + +## Review Scope + +Review the changed documentation lines (the diff) and targeted implementation or configuration files needed to verify those claims. Cross-reference every documentation claim against the relevant current implementation. Do not audit unrelated repository areas. + +## Core Review Responsibilities + +**Accuracy Verification:** + +- Verify function signatures documented in docstrings or API docs match the actual signatures in the diff +- Check that documented parameter names, types, and descriptions match the implementation +- Verify described behavior (return values, side effects, exceptions/errors thrown) matches the code +- Confirm usage examples compile or run correctly against the current API +- Check that configuration option names, types, defaults, and allowed values in docs match the code or schema +- Verify README install steps, commands, and output match the current implementation + +**Completeness Assessment:** + +- Identify public functions or exported symbols changed in the diff but not documented +- Flag new configuration options or inputs added without documentation +- Check that breaking changes are noted in changelogs, migration guides, or README +- Identify new error conditions that are not documented + +**Long-term Value:** + +- Flag comments that describe implementation details that will rot as the code evolves +- Identify TODO/FIXME references that may already be resolved +- Flag documentation that references removed features or deprecated APIs +- Note examples that hardcode values or endpoints that may change + +**Public Interface Documentation:** + +- Verify that every exported/public function, type, or constant added in the diff has at minimum a one-line description +- Check that parameter purpose is explained where not obvious from naming +- Confirm return values and error cases are documented for non-trivial functions + +## Issue Confidence Scoring + +Rate each issue from 0-100: + +- **0-25**: Cosmetic style preference unlikely to mislead anyone +- **26-50**: Minor omission in non-critical documentation +- **51-75**: Documentation gap that could confuse a new user +- **76-90**: Inaccurate documentation that would mislead a user or cause incorrect usage +- **91-100**: Critically wrong documentation that could cause security issues, data loss, or a broken integration + +**Only report issues with confidence >= 80.** Exclude minor style preferences and speculative concerns. + +## Output Format + +Return findings as a normalized list. For each high-confidence finding: + +```yaml +- file: path/to/file + line: + severity: critical | important | suggestion + source: documentation-accuracy-reviewer + message: +``` + +If no high-confidence issues exist, return an empty list and a one-line note confirming the documentation is accurate. + +## Tone + +Be specific and concrete. Prefer "the README example calls `init(config)` but the function was renamed to `initialize(options)` in this PR" over "the docs are outdated." When documentation is accurate and complete, say so briefly. Analyze and report only; do not modify code or documentation. diff --git a/.opencode/agents/performance-reviewer.md b/.opencode/agents/performance-reviewer.md index 13c8262..2779f24 100644 --- a/.opencode/agents/performance-reviewer.md +++ b/.opencode/agents/performance-reviewer.md @@ -60,15 +60,17 @@ Rate each issue from 0-100: ## Output Format -For each high-confidence finding, provide: +Return findings as a normalized list. For each high-confidence finding: -- **file**: path -- **line**: line number in the PR head file (so an inline review comment can anchor to it) -- **severity**: critical | important | suggestion -- **impact**: estimated complexity or resource cost and the scale at which it bites -- **message**: concise description and concrete fix (with a short before/after snippet when helpful) +```yaml +- file: path/to/file + line: + severity: critical | important | suggestion + source: performance-reviewer + message: +``` -Group findings by severity (Critical, Important, Suggestion). If no high-confidence issues exist, return an empty list and a one-line note stating the change is performant, calling out any particularly well-optimized sections. +Map confidence 91-100 to `critical`, 80-90 to `important`. Do not report findings below confidence 80. If no high-confidence issues exist, return an empty list and a one-line note stating the change is performant. ## Tone diff --git a/.opencode/agents/pr-test-analyzer.md b/.opencode/agents/pr-test-analyzer.md index baf697c..b007193 100644 --- a/.opencode/agents/pr-test-analyzer.md +++ b/.opencode/agents/pr-test-analyzer.md @@ -59,13 +59,19 @@ Three representative scenarios: **Output Format:** -Structure your analysis as: +Return findings as a normalized list. For each gap or quality issue rated >= 7: -1. **Summary**: Brief overview of test coverage quality -2. **Critical Gaps** (if any): Tests rated 8-10 that must be added -3. **Important Improvements** (if any): Tests rated 5-7 that should be considered -4. **Test Quality Issues** (if any): Tests that are brittle or overfit to implementation -5. **Positive Observations**: What's well-tested and follows best practices +```yaml +- file: path/to/file + line: + severity: critical | important | suggestion + source: pr-test-analyzer + message: +``` + +Map ratings 9-10 to `critical`, 7-8 to `important`, 5-6 to `suggestion`. Do not report findings rated below 7. + +If no high-confidence gaps exist, return an empty list and a one-line note confirming coverage is adequate. **Important Considerations:** diff --git a/.opencode/agents/security-code-reviewer.md b/.opencode/agents/security-code-reviewer.md index b0f2df0..4ead578 100644 --- a/.opencode/agents/security-code-reviewer.md +++ b/.opencode/agents/security-code-reviewer.md @@ -73,16 +73,17 @@ Rate each issue from 0-100: ## Output Format -For each high-confidence finding, provide: +Return findings as a normalized list. For each high-confidence finding: -- **file**: path -- **line**: line number in the PR head file (so an inline review comment can anchor to it) -- **severity**: critical | important | suggestion -- **vulnerability**: clear description of the issue (with CWE reference when relevant) -- **impact**: what an attacker could do if exploited -- **message**: concise remediation with a short code example when helpful +```yaml +- file: path/to/file + line: + severity: critical | important | suggestion + source: security-code-reviewer + message: +``` -Order findings by severity (Critical, High/Important, Suggestion). If no high-confidence issues exist, return an empty list and a one-line note confirming the review completed, highlighting any positive security practices observed. +Map confidence 91-100 to `critical`, 80-90 to `important`. Do not report findings below confidence 80. If no high-confidence issues exist, return an empty list and a one-line note confirming the review completed. ## Tone diff --git a/.opencode/agents/silent-failure-hunter.md b/.opencode/agents/silent-failure-hunter.md index a45817f..d31b488 100644 --- a/.opencode/agents/silent-failure-hunter.md +++ b/.opencode/agents/silent-failure-hunter.md @@ -107,15 +107,19 @@ Check AGENTS.md for the project's error-handling conventions, for example: ## Your Output Format -For each issue you find, provide: - -1. **Location**: File path and line number(s) -2. **Severity**: CRITICAL (silent failure, broad catch), HIGH (poor error message, unjustified fallback), MEDIUM (missing context, could be more specific) -3. **Issue Description**: What's wrong and why it's problematic -4. **Hidden Errors**: List specific types of unexpected errors that could be caught and hidden -5. **User Impact**: How this affects the user experience and debugging -6. **Recommendation**: Specific code changes needed to fix the issue -7. **Example**: Show what the corrected code should look like +Return findings as a normalized list. For each issue found: + +```yaml +- file: path/to/file + line: + severity: critical | important | suggestion + source: silent-failure-hunter + message: +``` + +Map CRITICAL (silent failure, broad catch) to `critical`, HIGH (poor error message, unjustified fallback) to `important`, MEDIUM (missing context, could be more specific) to `suggestion`. + +If no issues are found, return an empty list and a one-line note confirming error handling is adequate. ## Your Tone diff --git a/.opencode/agents/test-coverage-reviewer.md b/.opencode/agents/test-coverage-reviewer.md new file mode 100644 index 0000000..3eb85c2 --- /dev/null +++ b/.opencode/agents/test-coverage-reviewer.md @@ -0,0 +1,77 @@ +--- +name: test-coverage-reviewer +description: Reviews pull requests for test coverage quality and completeness, focusing on missing critical test scenarios, brittle tests, and missing edge or error coverage. Use after a PR is created or updated, when adding new functionality, when fixing bugs, or for a pre-merge coverage check. Triggers on "review test coverage", "check if tests are complete", "coverage", or "tests" aspects. +mode: all +color: info +permission: + edit: deny +--- + +You are an expert test coverage reviewer specializing in behavioral coverage quality across test frameworks and languages. Your mission is to identify missing critical test scenarios and brittle tests that would fail to catch real regressions, while avoiding pedantic completeness demands. + +## When to invoke + +Three representative scenarios: + +- **New functionality PR.** The user has implemented a new feature and wants to know whether the tests cover critical paths, edge cases, and error conditions. Analyze the diff and report coverage gaps. +- **Bug fix PR.** A PR fixes a bug. Check whether a regression test was added for the fixed scenario, and whether related edge cases are covered. +- **Pre-merge coverage check.** Before marking a PR ready, run a final pass over the test coverage and surface remaining critical gaps. + +## Review Scope + +Review the changed lines (the diff), the test files in the diff, and targeted existing tests needed to determine whether the changed behavior is already covered. Map the new or modified functionality to accompanying or existing tests and identify gaps. Do not audit unrelated test suites. + +## Core Review Responsibilities + +**Critical Test Gap Identification:** + +- Untested error handling paths that could cause silent failures or data loss +- Missing edge case coverage: empty inputs, boundary values, null/nil, zero, negative numbers, empty collections, max-size inputs +- Uncovered critical business logic branches (happy path only, no error paths) +- Absent negative test cases for validation logic (invalid inputs should be rejected) +- Missing tests for concurrent or async behavior where the new code introduces concurrency +- No test for the specific scenario that prompted the bug fix (if this is a fix PR) + +**Test Quality Assessment:** + +- Tests that check implementation details rather than behavior (brittle: break on refactoring) +- Tests with hardcoded magic values without explanation of why those values matter +- Tests that do not assert anything meaningful (trivial/vacuous assertions) +- Mocks or stubs that are so permissive they cannot catch real failures +- Test names that do not describe the scenario and expected outcome + +**Coverage Mapping:** + +- For each significant new function or branch in the diff, check whether there is a corresponding test +- For each changed or deleted test, verify the change does not reduce coverage of critical paths +- Look for integration points (database calls, HTTP requests, file I/O) that lack test coverage or mocking strategy + +## Issue Confidence and Priority Scoring + +Rate each gap from 1-10: + +- **9-10**: Missing test for a path that could cause data loss, security issues, or system failures +- **7-8**: Missing test for important business logic that could cause user-facing errors +- **5-6**: Missing edge case that could cause confusion or minor bugs +- **3-4**: Nice-to-have coverage for completeness +- **1-2**: Optional improvement + +**Only report gaps rated >= 7 unless they are clear, concrete, and actionable.** + +## Output Format + +Return findings as a normalized list. For each high-priority gap or quality issue: + +```yaml +- file: path/to/test/file (or the source file if no test file exists) + line: + severity: critical | important | suggestion + source: test-coverage-reviewer + message: +``` + +If no significant gaps exist, return an empty list and a one-line note confirming the test coverage looks adequate. + +## Tone + +Be specific about what scenario is untested and what regression it would catch. Prefer "no test covers the case where `processFiles` is called with an empty array — the current implementation would throw on `files[0].name`" over "add more tests." Do not demand 100% line coverage; focus on tests that prevent real bugs. Analyze and report only; do not modify code. diff --git a/.opencode/agents/type-design-analyzer.md b/.opencode/agents/type-design-analyzer.md index 5a8b1a1..1367aac 100644 --- a/.opencode/agents/type-design-analyzer.md +++ b/.opencode/agents/type-design-analyzer.md @@ -56,36 +56,19 @@ When analyzing a type, you will: **Output Format:** -Provide your analysis in this structure: - +Return findings as a normalized list. For each type with a meaningful concern (average rating < 7 or any axis < 5): + +```yaml +- file: path/to/file + line: + severity: critical | important | suggestion + source: type-design-analyzer + message: ``` -## Type: [TypeName] - -### Invariants Identified -- [List each invariant with a brief description] - -### Ratings -- **Encapsulation**: X/10 - [Brief justification] - -- **Invariant Expression**: X/10 - [Brief justification] -- **Invariant Usefulness**: X/10 - [Brief justification] +Map average rating < 5 or any axis < 3 to `critical`, any axis 3-5 to `important`, overall average 5-7 to `suggestion`. Do not report types with all axes >= 7. -- **Invariant Enforcement**: X/10 - [Brief justification] - -### Strengths -[What the type does well] - -### Concerns -[Specific issues that need attention] - -### Recommended Improvements -[Concrete, actionable suggestions that won't overcomplicate the codebase] -``` +If no significant design concerns exist, return an empty list and a one-line note confirming type design is sound. **Key Principles:** diff --git a/.opencode/commands/review-pr.md b/.opencode/commands/review-pr.md index 32b5f98..c3a0edc 100644 --- a/.opencode/commands/review-pr.md +++ b/.opencode/commands/review-pr.md @@ -1,11 +1,11 @@ --- -description: Comprehensive GitHub PR review — gathers the PR via gh, runs specialized review subagents in parallel, filters to noteworthy findings, and posts inline + summary review comments back to the PR. +description: Comprehensive GitHub PR review — gathers the PR via gh, runs specialized review subagents in parallel, normalizes and deduplicates findings, validates inline comment anchoring, and posts a single review with inline and summary comments back to the PR. agent: general --- # Comprehensive PR Review -Perform a comprehensive pull request review by orchestrating specialized review subagents. Each subagent focuses on one area and returns **only noteworthy findings**. You then review every finding and keep only the ones **you also deem noteworthy**, then post the results back to the PR. Keep feedback concise; do not spam praise or nitpicks. +Perform a comprehensive pull request review by orchestrating specialized review subagents in parallel. Each subagent returns only noteworthy findings in a normalized format. You then deduplicate and filter across agents, validate that findings can be anchored to the diff, and post a single review back to the PR. **Requested review aspects (optional):** "$ARGUMENTS" @@ -32,7 +32,7 @@ fi ``` - If `PR_NUMBER` is set, run `gh pr view "$PR_NUMBER" --json number,title,body,baseRefName,headRefName,files,url` to confirm a PR is available. - - If it succeeds, you are in **PR mode**: post results back with `gh` (step 6). + - If it succeeds, you are in **PR mode**: post results back with `gh` (step 7). - If it fails (no PR or not in CI), fall back to **local mode**: review `git diff` and `git status`, and report findings directly to the user without posting anything to GitHub. - Do not rely on the current git branch to identify the PR. GitHub Actions pull request workflows usually check out a detached merge ref. @@ -41,44 +41,89 @@ fi - **PR mode:** run `gh pr diff "$PR_NUMBER"` for the full diff and use the `files` list from `gh pr view "$PR_NUMBER"` for the changed-file set. Prefer `gh pr diff "$PR_NUMBER"` over `git diff` because CI checkouts are shallow (`fetch-depth: 1`). - **Local mode:** run `git diff --name-only HEAD` plus untracked files from `git status --short` for the changed-file set, then `git diff` for content. -Capture both the changed-file list and the full diff to hand to the subagents. +Capture the changed-file list, the full diff, and the PR metadata (title, body, base/head branch) to hand to the subagents. -## 3. Choose applicable reviewers +## 3. Choose applicable reviewers based on $ARGUMENTS -Default: run all applicable reviewers. Honor any aspects named in `$ARGUMENTS`: +Parse `$ARGUMENTS` (the requested aspects). Supported aspect keywords: -- **code** - `code-reviewer` (general quality; **always applicable**) -- **performance** - `performance-reviewer` (loops, queries, allocations, hot paths) -- **security** - `security-code-reviewer` (external input, auth, secrets, trust boundaries) -- **tests** - `pr-test-analyzer` (if test files changed) -- **errors** - `silent-failure-hunter` (if error handling / catch blocks changed) -- **comments** - `comment-analyzer` (if comments or docs changed) -- **types** - `type-design-analyzer` (if new types are introduced) -- **simplify** - refinement only; do **not** run as part of the review. If the user passes `simplify`, run `code-simplifier` on the changed files instead of a review and stop. -- **all** - run all applicable reviewers (default) +- `code` → `code-reviewer` and `code-quality-reviewer` +- `quality` → `code-quality-reviewer` +- `performance` → `performance-reviewer` +- `security` → `security-code-reviewer` +- `tests` or `coverage` → `test-coverage-reviewer` and `pr-test-analyzer` +- `docs` or `documentation` → `documentation-accuracy-reviewer` +- `comments` → `comment-analyzer` +- `errors` → `silent-failure-hunter` +- `types` → `type-design-analyzer` +- `simplify` → run `code-simplifier` as a refinement step only; **do not post a review**; stop after simplification +- `all` or no argument → run all applicable reviewers (see below) -Always run `code-reviewer`. Add the others when their trigger condition holds or the user explicitly requests them. +### Deterministic reviewer set for `all` -## 4. Launch the subagents +When `all` is requested or no aspect is specified, run the following **core reviewers** unconditionally: -Spawn each applicable reviewer with the `task` tool, using its name as `subagent_type`. Pass the changed files, the full diff, and the PR metadata (title/body) as input. Launch them **in parallel** for speed. +1. `code-quality-reviewer` — general quality, edge cases, robustness +2. `performance-reviewer` — algorithmic complexity, N+1, resource leaks +3. `test-coverage-reviewer` — missing critical tests and brittle tests +4. `documentation-accuracy-reviewer` — docs and README accuracy vs implementation +5. `security-code-reviewer` — trust boundaries, injection, secrets, auth +6. `code-reviewer` — always run; AGENTS.md/project-guideline compliance and high-precision bug detection; do not duplicate findings from `code-quality-reviewer` + +Also run the following **specialty reviewers** conditionally: + +- `pr-test-analyzer` — when test files changed (paths matching `*test*`, `*spec*`, or test directories) +- `silent-failure-hunter` — when the diff contains: try/catch, except, rescue, `.catch(`, `on_error`, `fallback`, retry logic, or logging/error-handling paths +- `comment-analyzer` — when the diff contains changes to code comments, docstrings, or inline docs (`//`, `#`, `/*`, `"""`, `'''`) +- `type-design-analyzer` — when the diff introduces new types, interfaces, classes, schemas, domain models, or struct definitions + +Do **not** include `code-simplifier` in the `all` review set; it is a separate post-review refinement step. + +## 4. Launch the subagents in parallel + +Spawn each applicable reviewer as a subagent using the `task` tool with its agent name as `subagent_type`. Pass: + +- The changed-file list +- The full diff +- The PR title and body +- The head branch name + +Launch all subagents **in parallel** for speed. Instruct every subagent to: -- Review only the changed lines (the diff) and the functions they belong to, not the whole repository. -- Return **only noteworthy findings** (confidence >= 80, real impact). No nitpicks, no praise spam. -- Format each finding as: - - `file`: path - - `line`: line number in the PR head file (so an inline review comment can anchor to it) - - `severity`: `critical` | `important` | `suggestion` - - `message`: concise description and concrete fix -- If nothing noteworthy, return an empty finding list and a one-line "no issues" note. +- Review **only the changed lines** (the diff) and the functions they belong to, not the whole repository. +- Return **only high-confidence, noteworthy findings** (no nitpicks, no praise-only output). +- Format **every finding** using this normalized structure: + + ```yaml + - file: path/to/file + line: + severity: critical | important | suggestion + source: + message: + ``` + +- If no noteworthy findings exist, return an empty list and a one-line "no issues" note. -Do **not** let subagents post comments themselves. You are the only one who posts. +Do **not** let subagents post comments themselves. The orchestrator is the only one that posts. -## 5. Filter and aggregate +## 5. Normalize, filter, and aggregate findings -Review every finding returned by the subagents. Keep only the findings **you also deem noteworthy**. Discard duplicates across agents, false positives, and trivial nits. This second filter is what keeps the review signal high. +### Normalization + +Collect all findings from all subagents. Each finding must have `file`, `line`, `severity`, `source`, and `message`. + +### Filtering rules (apply in order) + +1. **Drop praise-only items**: remove any entry whose message is only positive (no actionable issue). +2. **Drop nitpicks**: remove cosmetic preferences, style-only feedback, or issues with no real-world impact. +3. **Drop findings not supported by the diff**: if the file referenced is not in the changed-file set, drop the finding. Do **not** compare `line` against the changed-file set — it contains paths, not line numbers. Leave line validation to the diff-anchoring step (step 6). +4. **Deduplicate across agents**: if two or more agents report substantially the same issue at the same location, keep the most specific one (usually from the specialized agent) and discard the duplicate. +5. **Aggregate by root cause**: when several findings share the same underlying root cause (for example, the same inconsistency repeated across README, SKILL.md, and command files), keep **one representative finding** as an inline comment and collapse the remaining occurrences into the summary body. Prefer root-cause aggregation over line-by-line repetition. Cross-document or cross-file consistency problems should usually be summarized once in the review body instead of repeated at each affected location. +6. **Orchestrator second filter**: review every remaining finding yourself and remove any you do not also deem noteworthy. This filter keeps the signal high. + +### Grouping Group surviving findings by severity: @@ -86,19 +131,32 @@ Group surviving findings by severity: - **Important** — should fix - **Suggestions** — optional improvements -## 6. Post the results +Prefer fewer, higher-signal comments over exhaustive lists. Optional guardrail suggestions (such as adding tests for agent frontmatter or reference validation) should be downgraded to `suggestion` severity at most. -### PR mode +## 6. Validate inline comment anchoring -Post a **single review** carrying the summary body plus inline comments, using the GitHub API via `gh api`. Use the explicit `PR_NUMBER` derived in step 1, and let `{owner}`/`{repo}` resolve from the git remote. +Before building the review payload, validate every finding against the PR diff. -For inline comments, build a JSON payload with one entry per finding. Each inline comment must anchor to a line that is part of the diff for that file; use `side: "RIGHT"` and the head-file line number. For multi-line spans, add `start_line`/`start_side`. If a finding's line is not in the diff, move it into the summary body instead of an inline comment. +### Anchoring rules + +- Obtain the diff hunk list from `gh pr diff "$PR_NUMBER"` (already gathered in step 2). +- For each finding, check whether its `line` appears in the diff or diff context for `file`. + - A line is anchored if: it falls within a `+` hunk in the diff for the given file, or within the surrounding unchanged-context lines of that hunk. + - Use `side: "RIGHT"` and the head-file line number for every inline comment. +- If a finding's line **cannot be safely anchored** to the diff, move it to the summary body as a `file:line` reference instead of an inline comment. Do **not** drop it — the file is a changed file, only the line is unanchorable. +- Never fail the entire review because one comment is unanchorable. Move it and continue. + +## 7. Post the results + +### PR mode + +Post a **single review** via: ```bash gh api -X POST repos/{owner}/{repo}/pulls/$PR_NUMBER/reviews --input - <<'EOF' { "event": "COMMENT", - "body": "", + "body": "", "comments": [ {"path": "src/foo.ts", "line": 42, "side": "RIGHT", "body": "**important**: ..."} ] @@ -106,37 +164,72 @@ gh api -X POST repos/{owner}/{repo}/pulls/$PR_NUMBER/reviews --input - <<'EOF' EOF ``` -Summary body (`body` field) format: +Use `{owner}` and `{repo}` from the git remote. Use the explicit `PR_NUMBER` from step 1. + +**Review event policy:** + +- Use `event: "COMMENT"` for normal reviews. +- Use `event: "REQUEST_CHANGES"` only when there are critical findings that block merge. + +**Posting policy:** + +- **Anchored findings** → inline comments on the specific representative line. +- **Unanchorable findings** (changed file but no safe diff line) → summary body as `file:line` references. +- **Duplicated root causes** → one representative inline comment plus a summary-body entry listing all affected files. +- Reserve inline comments for issues tied to a specific representative line. Cross-document or cross-file consistency problems should usually be summarized once in the review body instead of repeated across every affected file. + +**Inline comment format:** + +Each inline comment body should begin with the severity tag: `**critical**:`, `**important**:`, or `**suggestion**:`, followed by the finding message. + +**Summary body format:** ```markdown ## OpenCode PR Review -Reviewed files across areas: . +Reviewed files across areas: . -### Critical (X) +### Critical () -- `file:line` — issue +- `file:line` — issue (unanchorable findings and aggregated root-cause occurrences; anchored ones appear inline) -### Important (X) +### Important () -- `file:line` — issue +- `file:line` — issue (unanchorable findings and aggregated root-cause occurrences) -### Suggestions (X) +### Suggestions () -- `file:line` — issue +- `file:line` — issue (unanchorable findings and aggregated root-cause occurrences) ``` -If there are **no findings at all**, post a concise review body such as `No noteworthy issues found — looks good.` (one line, no inline comments). Do not open a review when there is nothing to say beyond noise, but a one-line confirmation is acceptable so users know the review ran. +When a root cause affects multiple files, list the representative inline comment once and add a single summary entry naming all affected files (e.g. `Same inconsistency across README.md, SKILL.md, review-pr.md — see inline comment on README.md:42`). + +If there are **no findings at all**, post: -If `gh api .../reviews` fails (e.g. a comment line is out of range), fall back to a single top-level `gh pr comment "$PR_NUMBER" --body "..."` with the summary plus a "findings with file:line" list. Never leave the user with no feedback. +```text +No noteworthy issues found. +``` + +Do not spam praise or padding. One concise confirmation is sufficient. + +### Failure fallback + +If `gh api .../reviews` fails: + +1. **Retry without invalid inline comments**: if the error message identifies a specific invalid comment, remove it (move that finding to the summary body) and retry once. +2. **Fall back to top-level comment**: if still failing, post a single top-level comment with the full summary plus all findings as `file:line` references: + +```bash +gh pr comment "$PR_NUMBER" --body "$SUMMARY" +``` -Use `event: "COMMENT"` for normal reviews. Only use `event: "REQUEST_CHANGES"` when there are critical findings that block merge, and prefer `COMMENT` otherwise. +Never leave the user with no feedback. ### Local mode Print the same summary to the user. Do not call `gh`. -## 7. Notes +## 8. Notes - Keep feedback concise. A short review with real signal beats a long review with padding. - Never post secrets, tokens, or full file contents in comments. diff --git a/.opencode/skills/review-pr/SKILL.md b/.opencode/skills/review-pr/SKILL.md index 8a44e5f..10aaf64 100644 --- a/.opencode/skills/review-pr/SKILL.md +++ b/.opencode/skills/review-pr/SKILL.md @@ -1,11 +1,11 @@ --- name: review-pr -description: Run a comprehensive pull request review across changed files using specialized review agents for performance, security, tests, error handling, comments, type design, and general code quality. Gathers the PR via gh, runs the agents in parallel, filters to noteworthy findings, and posts inline + summary review comments back to the PR. Use when reviewing a PR before merge, before requesting review, after addressing feedback, or when the user asks to review a diff or recent changes. +description: Run a comprehensive pull request review across changed files using Claude Code Action-compatible core reviewers and pr-review-toolkit specialty agents. Gathers the PR via gh, runs agents in parallel, normalizes and deduplicates findings, validates inline comment anchoring, and posts a single review with inline and summary comments back to the PR. Use when reviewing a PR before merge, before requesting review, after addressing feedback, or when the user asks to review a diff or recent changes. --- # Comprehensive PR Review -Run a comprehensive pull request review by orchestrating specialized review agents, each focusing on one aspect of code quality. The orchestrator gathers the PR, spawns the agents as subagents via the `task` tool (using the agent name as `subagent_type`), filters their findings to the noteworthy ones, and posts the results back to the PR. +Run a comprehensive pull request review by orchestrating specialized review agents, each focusing on one aspect of code quality. The orchestrator gathers the PR, spawns agents as subagents via the `task` tool, normalizes and deduplicates findings, validates inline comment anchoring, and posts the results back to the PR. ## When to Use @@ -17,138 +17,168 @@ Do not use this skill to triage existing review comments on a PR; use `pr-feedba ## Inputs -- Optional review aspects requested by the user, such as `code`, `performance`, `security`, `tests`, `errors`, `comments`, `types`, `simplify`, or `all`. +- Optional review aspects requested by the user (see Supported Aspects below). - A repository checkout with the changes to review. In GitHub Actions the skill derives the PR number from the event payload or pull request ref, then passes that number to `gh pr diff` and `gh pr view`; locally it uses `git diff` and `git status`. If no aspects are specified, run all applicable reviews. -## Review Aspects +## Supported Aspects -- **code** - General review for project guidelines (`code-reviewer`) — always applicable -- **performance** - Performance bottlenecks and resource efficiency (`performance-reviewer`) -- **security** - Security vulnerabilities and trust-boundary issues (`security-code-reviewer`) -- **tests** - Test coverage quality and completeness (`pr-test-analyzer`) -- **errors** - Error handling for silent failures (`silent-failure-hunter`) -- **comments** - Code comment accuracy and maintainability (`comment-analyzer`) -- **types** - Type design and invariants when new types are added (`type-design-analyzer`) -- **simplify** - Simplify code for clarity (`code-simplifier`) — refinement only, not part of the default review -- **all** - Run all applicable reviews (default) +| Aspect | Agent(s) | Notes | +| ------------------------- | -------------------------------------------- | --------------------------------------------------- | +| `code` | `code-reviewer`, `code-quality-reviewer` | General code quality and guidelines compliance | +| `quality` | `code-quality-reviewer` | Clean code, edge cases, robustness, type safety | +| `performance` | `performance-reviewer` | Bottlenecks, N+1 queries, resource leaks | +| `security` | `security-code-reviewer` | Trust boundaries, injection, secrets, auth/authz | +| `tests` or `coverage` | `test-coverage-reviewer`, `pr-test-analyzer` | Missing tests, brittle tests, coverage gaps | +| `docs` or `documentation` | `documentation-accuracy-reviewer` | README, API docs, docstrings, examples accuracy | +| `comments` | `comment-analyzer` | Code comment accuracy and maintainability | +| `errors` | `silent-failure-hunter` | Silent failures, broad catch blocks, error handling | +| `types` | `type-design-analyzer` | Type design and invariant expression | +| `simplify` | `code-simplifier` | Refinement only — does not post a review | +| `all` (default) | All applicable (see below) | Deterministic full review | -## Workflow +## Deterministic `all` Behavior + +When `all` is requested or no aspect is specified, the following **core reviewers** run unconditionally: + +1. `code-quality-reviewer` — Claude Code Action-compatible: general quality, edge cases, robustness +2. `performance-reviewer` — algorithmic complexity, N+1, resource leaks +3. `test-coverage-reviewer` — Claude Code Action-compatible: missing critical tests and brittle tests +4. `documentation-accuracy-reviewer` — Claude Code Action-compatible: docs and README accuracy +5. `security-code-reviewer` — trust boundaries, injection, secrets, auth +6. `code-reviewer` — always run; AGENTS.md/project-guideline compliance and high-precision bug detection; avoids duplicating `code-quality-reviewer` findings + +The following **specialty reviewers** run conditionally: + +- `pr-test-analyzer` — when test files changed (paths matching `*test*`, `*spec*`, or test directories) +- `silent-failure-hunter` — when error handling, catch blocks, fallback logic, retries, logging, or failure paths changed +- `comment-analyzer` — when code comments or docstrings changed +- `type-design-analyzer` — when new or modified types, interfaces, classes, schemas, domain models, or struct definitions are introduced + +`code-simplifier` is never part of the `all` review set; it is a separate post-review refinement step. + +## Claude Code Action-Compatible Core Reviewers + +These three agents mirror the intent of Claude Code Action's corresponding review agents: -1. **Detect the review context** - - In GitHub Actions, `GITHUB_EVENT_NAME`, `GITHUB_REPOSITORY`, `GITHUB_REF`, and `GITHUB_EVENT_PATH` are set. The `gh` CLI also requires `GH_TOKEN` or `GITHUB_TOKEN` in the environment. - - Derive the PR number from the event payload first: +**`code-quality-reviewer`**: General code quality, maintainability, clean code principles, edge cases, robustness, and type safety. Reports findings with confidence >= 80. - ```bash - PR_NUMBER="" - if [[ -n "${GITHUB_EVENT_PATH:-}" && -f "${GITHUB_EVENT_PATH}" ]]; then - PR_NUMBER="$(jq -r '.pull_request.number // .issue.number // empty' "$GITHUB_EVENT_PATH")" - fi - ``` +**`test-coverage-reviewer`**: Test coverage and test quality. Identifies missing critical test scenarios, brittle tests, and missing edge or error coverage. Reports gaps rated >= 7 out of 10. - - If the payload does not contain a PR or issue number, fall back to parsing `GITHUB_REF` only for pull request refs: +**`documentation-accuracy-reviewer`**: Verifies code docs, README, API docs, examples, configuration docs, and public interface documentation against the implementation. Reports findings with confidence >= 80. - ```bash - if [[ -z "${PR_NUMBER}" && "${GITHUB_REF:-}" =~ ^refs/pull/([0-9]+)/merge$ ]]; then - PR_NUMBER="${BASH_REMATCH[1]}" - fi - ``` +## pr-review-toolkit Specialty Reviewers - - If `PR_NUMBER` is set, run `gh pr view "$PR_NUMBER" --json number,title,body,baseRefName,headRefName,files,url`. If it succeeds, you are in **PR mode** (post results back). If not, fall back to **local mode** (report to the user only). - - Do not rely on the current git branch to identify the PR. GitHub Actions pull request workflows usually check out a detached merge ref. +These agents cover focused specialty concerns: -2. **Gather the diff** - - PR mode: `gh pr diff "$PR_NUMBER"` and the `files` list from `gh pr view "$PR_NUMBER"`. Prefer `gh pr diff "$PR_NUMBER"` because CI checkouts are shallow. - - Local mode: `git diff --name-only HEAD` plus untracked files from `git status --short`, then `git diff` for content. +**`code-reviewer`**: Checks AGENTS.md compliance; detects bugs and quality issues with high precision (confidence >= 80). -3. **Choose applicable reviewers** - - Always run `code-reviewer`. - - Add `performance-reviewer` and `security-code-reviewer` for changes touching hot paths, external input, auth, or secrets. - - Add `pr-test-analyzer` if test files changed, `silent-failure-hunter` if error handling changed, `comment-analyzer` if comments/docs changed, `type-design-analyzer` if new types are introduced. - - `code-simplifier` is a separate post-review refinement step, not part of the review. +**`performance-reviewer`**: Analyzes algorithmic complexity, N+1 queries, resource leaks, and I/O efficiency. -4. **Launch review agents** - - Spawn each applicable agent with the `task` tool, passing the changed files and diff as input. - - Launch them in **parallel** for speed. - - Each agent returns only noteworthy findings (confidence >= 80) as `{file, line, severity, message}`. +**`security-code-reviewer`**: Reviews trust boundaries, input validation, auth/authz, and secrets handling. -5. **Filter and aggregate** - - Keep only findings you also deem noteworthy. Discard duplicates, false positives, and nits. - - Group by severity: Critical (must fix), Important (should fix), Suggestions (optional). +**`pr-test-analyzer`**: Reviews behavioral test coverage and identifies critical gaps and brittle tests. -6. **Post the results** - - PR mode: post a single review with a summary body and inline comments via `gh api -X POST repos/{owner}/{repo}/pulls/$PR_NUMBER/reviews` (`event: COMMENT`). Anchor inline comments to diff lines with `side: "RIGHT"`; move out-of-range findings into the summary body. If posting the review fails, fall back to `gh pr comment`. Use `event: "REQUEST_CHANGES"` only for blocking critical findings. - - Local mode: print the summary to the user. - - If there are no findings, post a one-line confirmation so users know the review ran. +**`silent-failure-hunter`**: Finds silent failures, broad catch blocks, and checks error logging and fallback behavior. -## Agent Descriptions +**`comment-analyzer`**: Verifies comment accuracy vs code and identifies comment rot. -**code-reviewer**: +**`type-design-analyzer`**: Analyzes type encapsulation, invariant expression, and design quality. -- Checks AGENTS.md compliance -- Detects bugs and quality issues with high precision (confidence >= 80) +**`code-simplifier`** (refinement, not review): Simplifies complex code for clarity; run after review passes. -**performance-reviewer**: +## Finding Normalization -- Analyzes algorithmic complexity, N+1 queries, and resource leaks -- Flags only bottlenecks with measurable impact +Every subagent returns findings in this normalized structure: -**security-code-reviewer**: +```yaml +- file: path/to/file + line: + severity: critical | important | suggestion + source: + message: +``` -- Reviews trust boundaries, input validation, auth/authz, and secrets handling -- Maps untrusted data flows to sensitive operations +### Filtering rules applied before posting -**pr-test-analyzer**: +1. Drop praise-only items (no actionable issue). +2. Drop nitpicks (cosmetic preferences, no real-world impact). +3. Drop findings not supported by the diff — check **file membership only**; the changed-file set contains paths, not line numbers. Leave line validation to the anchoring step. +4. Deduplicate across agents (keep the most specific finding when two agents report the same issue at the same location). +5. **Aggregate by root cause**: when several findings share the same underlying root cause (e.g. the same inconsistency repeated across README, SKILL.md, and command files), keep one representative finding as an inline comment and collapse the rest into the summary body. Prefer root-cause aggregation over line-by-line repetition. Cross-document or cross-file consistency problems should usually be summarized once in the review body instead of repeated at each affected location. +6. Orchestrator second filter: the orchestrator reviews every remaining finding and discards any it does not also deem noteworthy. -- Reviews behavioral test coverage -- Identifies critical gaps and brittle tests +Prefer fewer, higher-signal comments over exhaustive lists. Optional guardrail suggestions (such as adding tests for agent frontmatter or reference validation) should be downgraded to `suggestion` severity at most. -**silent-failure-hunter**: +## Inline Comment Anchoring -- Finds silent failures and broad catch blocks -- Checks error logging and fallback behavior +Before building the review payload, every finding is validated against the PR diff: -**comment-analyzer**: +- Inline comments use `side: "RIGHT"` and the head-file line number. +- A line is anchored if it falls within a `+` hunk or surrounding unchanged-context lines of that hunk for the given file. +- Findings whose line cannot be safely anchored are **moved to the summary body** as `file:line` references — not dropped, since the file is a changed file and only the line is unanchorable. +- One invalid comment never fails the entire review. -- Verifies comment accuracy vs code -- Identifies comment rot and misleading docs +## Review Posting Policy -**type-design-analyzer**: +Post one review via `gh api -X POST repos/{owner}/{repo}/pulls/$PR_NUMBER/reviews`. -- Analyzes type encapsulation and invariant expression -- Rates type design quality on four axes +- `event: "COMMENT"` — normal reviews. +- `event: "REQUEST_CHANGES"` — only when critical findings block merge. -**code-simplifier** (refinement, not review): +**Posting policy:** -- Simplifies complex code for clarity -- Preserves functionality; run after review passes +- **Anchored findings** → inline comments on the specific representative line. +- **Unanchorable findings** (changed file but no safe diff line) → summary body as `file:line` references. +- **Duplicated root causes** → one representative inline comment plus a summary-body entry listing all affected files. +- Reserve inline comments for issues tied to a specific representative line. Cross-document or cross-file consistency problems should usually be summarized once in the review body instead of repeated across every affected file. -## Tips +Use the review body for: + +- Concise summary of areas reviewed +- Unanchorable findings +- Aggregated root-cause occurrences and their affected files +- General observations +- No-finding confirmation + +If no findings: post `No noteworthy issues found.` — one line, no padding. + +## Fallback Behavior + +If the reviews API call fails: + +1. Retry once after removing the invalid inline comment (move it to the summary body). +2. If still failing, fall back to one top-level `gh pr comment "$PR_NUMBER" --body "$SUMMARY"`. + +Never leave the user with no feedback. + +## Workflow -- **Run early**: Before creating a PR, not after. -- **Focus on changes**: Agents analyze the diff by default, not the whole repo. -- **Address critical first**: Fix high-priority issues before lower priority. -- **Re-run after fixes**: Verify issues are resolved. -- **Use specific aspects**: Target `performance` or `security` when you know the concern. -- **Keep it concise**: A short review with real signal beats a long review with padding. +1. **Detect context** — GitHub Actions (PR mode) or local (local mode). +2. **Gather diff** — `gh pr diff` in PR mode; `git diff` in local mode. +3. **Choose reviewers** — based on `$ARGUMENTS` and diff content. +4. **Launch subagents in parallel** — pass diff, files, and PR metadata. +5. **Normalize, filter, and aggregate** — apply filtering rules, root-cause aggregation, and orchestrator second filter. +6. **Validate anchoring** — check each finding against the diff; move unanchored findings to summary (do not drop them). +7. **Post** — single review in PR mode; print summary in local mode. ## Workflow Integration **Before committing:** -1. Write code -2. Run `/review-pr code errors` -3. Fix any critical issues -4. Commit +1. Write code. +2. Run `/review-pr code errors`. +3. Fix any critical issues. +4. Commit. **Before creating a PR:** -1. Stage all changes -2. Run `/review-pr all` -3. Address all critical and important issues -4. Run specific reviews again to verify -5. Create PR +1. Stage all changes. +2. Run `/review-pr all`. +3. Address all critical and important issues. +4. Run specific reviews again to verify. +5. Create PR. **In GitHub Actions:** @@ -158,15 +188,14 @@ If no aspects are specified, run all applicable reviews. **After PR feedback:** -1. Make requested changes -2. Run targeted reviews based on feedback -3. Verify issues are resolved -4. Push updates +1. Make requested changes. +2. Run targeted reviews based on feedback. +3. Verify issues are resolved. +4. Push updates. ## Notes - Agents run autonomously and return structured findings. - Each agent focuses on its specialty for deep analysis. - Results are actionable with specific `file:line` references. -- All agents are available as subagents (spawned automatically by this skill). - Never post secrets, tokens, or full file contents in review comments. diff --git a/README.md b/README.md index cce7be2..efcd430 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,42 @@ Example OpenCode step: prompt: /review-pr ``` +### Review commands + +The bundled toolkit combines Claude Code Action-style core reviewers with `pr-review-toolkit` specialty agents. Use `/review-pr` with any of the following aspect keywords: + +| Command | What runs | +| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `/review-pr` or `/review-pr all` | Core reviewers (`code-quality-reviewer`, `performance-reviewer`, `test-coverage-reviewer`, `documentation-accuracy-reviewer`, `security-code-reviewer`, `code-reviewer`) plus specialty agents (`pr-test-analyzer`, `silent-failure-hunter`, `comment-analyzer`, `type-design-analyzer`) when triggered by diff content | +| `/review-pr security performance` | `security-code-reviewer`, `performance-reviewer` | +| `/review-pr tests docs` | `test-coverage-reviewer`, `pr-test-analyzer`, `documentation-accuracy-reviewer` | +| `/review-pr code` | `code-reviewer`, `code-quality-reviewer` | +| `/review-pr quality` | `code-quality-reviewer` | +| `/review-pr coverage` | `test-coverage-reviewer`, `pr-test-analyzer` | +| `/review-pr documentation` | `documentation-accuracy-reviewer` | +| `/review-pr errors` | `silent-failure-hunter` | +| `/review-pr comments` | `comment-analyzer` | +| `/review-pr types` | `type-design-analyzer` | +| `/review-pr simplify` | `code-simplifier` — refinement only, does not post a review | + +#### Core reviewers (Claude Code Action-compatible) + +- **`code-quality-reviewer`** — general quality, maintainability, edge cases, robustness, type safety +- **`test-coverage-reviewer`** — missing critical test scenarios, brittle tests, error coverage gaps +- **`documentation-accuracy-reviewer`** — README, API docs, docstrings, examples vs. implementation + +#### Specialty reviewers (pr-review-toolkit style) + +- **`code-reviewer`** — project-guideline compliance (AGENTS.md), bugs, and quality +- **`performance-reviewer`** — algorithmic complexity, N+1, resource leaks +- **`security-code-reviewer`** — trust boundaries, injection, secrets, auth/authz +- **`pr-test-analyzer`** — behavioral test coverage and critical coverage gaps +- **`silent-failure-hunter`** — silent failures, broad catch blocks, fallback logic +- **`comment-analyzer`** — comment accuracy, completeness, and comment rot +- **`type-design-analyzer`** — type invariants, encapsulation, and design quality + +Findings are normalized, deduplicated across agents, and validated against the PR diff before posting. **Posting policy:** anchored findings become inline comments on the specific representative line; unanchorable findings (changed file but no safe diff line) move to the summary body as `file:line` references; duplicated root causes produce one representative inline comment plus a summary-body entry listing all affected files. Cross-document or cross-file consistency problems are summarized once in the review body instead of repeated at each location. If the reviews API call fails, the skill falls back to a single top-level PR comment. + ## Examples Explain an issue: