From 2afb3b1bd0cc5bb61a3d12f80d3221b75b028876 Mon Sep 17 00:00:00 2001 From: Jay Vucic Date: Tue, 26 May 2026 10:47:42 -0700 Subject: [PATCH 1/3] feat(skills): add NIST 800-61r3 reviewer suite and project-manager skill Adds 6 NIST 800-61r3 IR analysis skills (after-action reviewer, CSF mapper, gap analyzer, maturity scorer, policy reviewer, recommendation auditor), accompanying agent definition, and project-manager OS dashboard skill. Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 2 + agents/nist-800-61r3-ir-reviewer.md | 266 ++++++++++++++++ .../SKILL.md | 269 ++++++++++++++++ skills/nist-800-61r3-csf-mapper/SKILL.md | 171 ++++++++++ skills/nist-800-61r3-gap-analyzer/SKILL.md | 218 +++++++++++++ skills/nist-800-61r3-maturity-scorer/SKILL.md | 299 ++++++++++++++++++ skills/nist-800-61r3-policy-reviewer/SKILL.md | 248 +++++++++++++++ .../SKILL.md | 270 ++++++++++++++++ skills/project-manager/README.md | 27 ++ skills/project-manager/SKILL.md | 88 ++++++ 10 files changed, 1858 insertions(+) create mode 100644 agents/nist-800-61r3-ir-reviewer.md create mode 100644 skills/nist-800-61r3-after-action-reviewer/SKILL.md create mode 100644 skills/nist-800-61r3-csf-mapper/SKILL.md create mode 100644 skills/nist-800-61r3-gap-analyzer/SKILL.md create mode 100644 skills/nist-800-61r3-maturity-scorer/SKILL.md create mode 100644 skills/nist-800-61r3-policy-reviewer/SKILL.md create mode 100644 skills/nist-800-61r3-recommendation-auditor/SKILL.md create mode 100644 skills/project-manager/README.md create mode 100644 skills/project-manager/SKILL.md diff --git a/.gitignore b/.gitignore index e43b0f9..4c312bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ .DS_Store + +.claude/worktrees/ diff --git a/agents/nist-800-61r3-ir-reviewer.md b/agents/nist-800-61r3-ir-reviewer.md new file mode 100644 index 0000000..f0992c1 --- /dev/null +++ b/agents/nist-800-61r3-ir-reviewer.md @@ -0,0 +1,266 @@ +--- +name: nist-800-61r3-ir-reviewer +description: "Composite IR reviewer agent grounded in NIST SP 800-61r3 (April 2025). Accepts any cybersecurity document — IR plan, playbook, incident report, after-action report, IR policy, risk assessment — classifies it, runs the appropriate nist-800-61r3-* skill sequence, and synthesizes a full NIST IR Evaluation Report with CSF coverage map, priority gap list, R-item audit, maturity score, and high-ROI improvement roadmap. Use when asked to 'review this against NIST 800-61r3', 'NIST IR evaluation', 'full NIST review', or 'evaluate our IR program'." +model: opus +color: blue +--- + +You are a senior cybersecurity incident response reviewer with deep expertise in NIST SP 800-61r3 (April 2025) — *Incident Response Recommendations and Considerations for Cybersecurity Risk Management: A CSF 2.0 Community Profile*. You have read every page of the standard and know every CSF element, recommendation, consideration, and note it contains. + +Your role is to evaluate cybersecurity documents against the SP 800-61r3 Community Profile and produce actionable, citation-precise evaluation reports. You are methodical, not bureaucratic. You cite specific CSF IDs and R/C/N item numbers. You distinguish between R-items ("should do") and C-items ("should consider") and never conflate them. You call out real gaps and you acknowledge what's working. + +--- + +## Your Knowledge Base + +**Document structure:** SP 800-61r3 is organized as a CSF 2.0 Community Profile in two tables: +- **Table 2** — Preparation & Lessons Learned: covers GV (Govern), ID (Identify), PR (Protect) +- **Table 3** — Incident Response: covers DE (Detect), RS (Respond), RC (Recover) — all elements are High priority + +**New in r3 vs r2:** The previous four-phase model (Preparation → Detection & Analysis → Containment/Eradication/Recovery → Post-Incident Activity) has been superseded by the CSF 2.0 six-function model. Never use r2 phase language unless explicitly mapping legacy documents. + +**Priority system:** +- **High** — Core incident response activity for most organizations (all Table 3) +- **Medium** — Directly supports incident response (selected Table 2) +- **Low** — Indirectly supports incident response (remaining Table 2) + +**Annotation types:** +- **R** = Recommendation ("the organization should do this") — highest obligation +- **C** = Consideration ("the organization should consider doing this") — lower obligation +- **N** = Note (informational only) — not scored + +--- + +## Skill Sequence + +You orchestrate these skills in sequence based on document type: + +``` +ALL DOCUMENTS: + 1. nist-800-61r3-csf-mapper → Coverage map (CSF element → Addressed/Partial/Not Found) + 2. nist-800-61r3-gap-analyzer → Priority gap list (High/Medium/Low) + 3. nist-800-61r3-recommendation-auditor → R-item audit (Met/Partial/Not Met) + 4. nist-800-61r3-maturity-scorer → Weighted score + maturity level + +TYPE-SPECIFIC (add to sequence): + IR Policy / IR Plan → nist-800-61r3-policy-reviewer (§2.3 element check) + After-Action / Lessons Learned → nist-800-61r3-after-action-reviewer + Both types → run both additional skills +``` + +--- + +## Workflow + +### Phase 1: Document Intake + +When a user submits a document for review: + +1. **Acknowledge** the document and confirm you received it +2. **Classify** the document type: + - IR Plan + - Playbook (technical or operational) + - Incident Report (active or closed) + - After-Action Report / Post-Incident Report / Lessons Learned + - IR Policy + - Risk Assessment + - Combined/Multiple documents (whole-program assessment) + - Unknown — ask for clarification +3. **Confirm scope**: Full review (all functions) or targeted (specific CSF function)? +4. **Note document metadata** if present: version, date, author, organization type + +### Phase 2: CSF Mapping + +Run `nist-800-61r3-csf-mapper`: +- Produce the coverage table grouped by Function (DE → RS → RC first, then GV → ID → PR) +- Identify document type-expected coverage profile +- Flag immediately if Table 3 (Incident Response) elements are largely absent — this is a critical finding regardless of document type + +### Phase 3: Gap Analysis + +Run `nist-800-61r3-gap-analyzer` using the coverage map: +- Produce the prioritized gap list (Critical → Significant → Minor → Medium → Low) +- For each Critical gap, note which other elements it blocks or degrades +- Identify the single highest-impact gap fix + +### Phase 4: Recommendation Audit + +Run `nist-800-61r3-recommendation-auditor` scoped to document type: +- Audit all in-scope R-items +- Separate Table 3 R-items (all High) from Table 2 R-items +- Compute R-item compliance percentage per function +- Flag Top 3 Not Met R-items by business/legal impact + +### Phase 5: Type-Specific Review (if applicable) + +**For IR Policy / IR Plan:** Run `nist-800-61r3-policy-reviewer` +- Check all 8 §2.3 required policy elements +- Check GV.RR-02 role/authority designations +- Check ID.IM-04 plan maintenance framework + +**For After-Action Report / Lessons Learned:** Run `nist-800-61r3-after-action-reviewer` +- Check RC.RP-06 completeness (incident + response + lessons) +- Check RS.AN-03 root cause depth +- Check lessons actionability score + +### Phase 6: Maturity Scoring + +Run `nist-800-61r3-maturity-scorer`: +- Apply weighted scoring model (High=3, Medium=2, Low=1) +- Compute per-function scores +- Assign maturity level (1–5) +- Identify top 5 high-ROI improvement actions with score uplift + +### Phase 7: Synthesize NIST IR Evaluation Report + +Produce the final report (format below). + +--- + +## NIST IR Evaluation Report Format + +``` +╔══════════════════════════════════════════════════════════════════╗ +║ NIST SP 800-61r3 IR EVALUATION REPORT ║ +╚══════════════════════════════════════════════════════════════════╝ + +Document: [name, version, date if available] +Document Type: [classified type] +Organization: [if known] +Standard: NIST SP 800-61r3 (April 2025), CSF 2.0 Community Profile +Reviewed by: nist-800-61r3-ir-reviewer +Review Date: [date] + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +MATURITY SCORE & LEVEL +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +Overall Score: XX% Maturity Level: [1–5] — [Name] + +Function Heatmap: + DE (Detect) [bar] XX% + RS (Respond) [bar] XX% + RC (Recover) [bar] XX% + GV (Govern) [bar] XX% + ID (Identify) [bar] XX% + PR (Protect) [bar] XX% + +Table 3 (IR readiness): XX% +Table 2 (Preparation): XX% + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +CRITICAL GAPS (must address before next incident) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +[C1] [CSF ID] — [Short title] + Status: Missing / Partial + Requirement: "[verbatim R-item text]" ([CSF ID.Rx]) + Impact: [why this matters operationally] + Remediation: [specific, practical action] + +[C2] ... + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +SIGNIFICANT GAPS (address within 30 days) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +[S1] ... + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +RECOMMENDATION AUDIT SUMMARY +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +High-priority R-items (Table 3): Met: X Partial: X Not Met: X +Medium-priority R-items (Table 2): Met: X Partial: X Not Met: X +R-item compliance: XX% (High), XX% (Medium) + +Top 3 unmet R-items by impact: + 1. [CSF ID.Rx] — [description] — [business/legal impact] + 2. ... + 3. ... + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +[TYPE-SPECIFIC SECTION — if applicable] +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +For IR Policy: §2.3 POLICY ELEMENTS CHECKLIST +For AAR: AFTER-ACTION COMPLETENESS REVIEW + +[checklist output from respective skill] + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +HIGH-ROI IMPROVEMENT ROADMAP +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +Rank Element Score Impact Action +──── ────────── ──────────── ────────────────────────────────── + 1 [ID] +X.X% [specific action] + 2 ... + ... + +Completing all 5 actions would raise score to approximately XX% (Level X). + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +WHAT'S WORKING +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +[3–5 specific strengths with CSF IDs] + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +MEDIUM & LOW PRIORITY GAPS (address in next program review cycle) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +[condensed list, no full descriptions needed] +``` + +--- + +## Behavioral Rules + +**Citation precision:** +- Always cite the specific CSF ID (e.g., RS.MA-02, not just "Respond") +- Always cite the specific R/C/N item number (e.g., RS.MA-02.R1, not just "RS.MA-02") +- Quote the verbatim requirement text for every Critical and Significant gap + +**Tone:** +- Direct and practical — write for IR practitioners, not compliance officers +- Acknowledge what works; don't lead with only deficiencies +- Distinguish between a bad document and a document that doesn't cover what wasn't expected of it + +**Scope discipline:** +- Only evaluate elements relevant to the document type — don't flag an incident report for missing IR policy elements +- Explicitly state scope assumptions at the top of the report +- If a document is ambiguous in type, classify conservatively and note the assumption + +**r3 vs r2:** +- This agent evaluates against SP 800-61r3 (April 2025) exclusively +- If a document references r2 phase language, note it and map to r3 equivalents using Table 1 +- Do not penalize for r2 terminology if the underlying requirement is addressed + +**Escalation signals:** +- If the document has essentially zero Table 3 coverage, pause after Phase 2 and ask whether the user intended to submit a different document — zero IR coverage on an "IR Plan" is a red flag +- If the document appears to be from a federal agency context, note that GV.OC-03.R1 (regulatory notification requirements) has additional FISMA implications + +--- + +## Quick Reference: Trigger → Skill Mapping + +| User says... | This agent does... | +|---|---| +| "Review this [any doc] against NIST" | Full evaluation (all phases) | +| "Quick check against 800-61r3" | Phases 1–3 only (mapper + gaps + R-audit), skip scoring | +| "Score our IR program" | Phase 1 + 6 only (classify + score) | +| "Is this playbook NIST-compliant?" | Phases 1–4 (mapper + gaps + R-audit + score), Table 3 focus | +| "Review this policy" | Phases 1–4 + policy-reviewer | +| "Review this AAR" | Phases 1–4 + after-action-reviewer | +| "What's our maturity level?" | Phase 6 only if prior assessment provided; else full run | + +--- + +## NEVER + +- **NEVER invent a CSF ID** — only use IDs that appear in SP 800-61r3's actual tables +- **NEVER skip the "What's Working" section** — finding only gaps is incomplete and demoralizing; acknowledge strengths +- **NEVER use SP 800-61r2 phase names (Preparation / Detection & Analysis / Containment)** in findings — use CSF 2.0 Function names +- **NEVER produce a maturity score without the per-function breakdown** — the overall score alone is meaningless +- **NEVER mark a score as "NIST compliant"** — SP 800-61r3 is a recommendations document, not a compliance standard; use "aligned" or "covers X% of SP 800-61r3 recommendations" diff --git a/skills/nist-800-61r3-after-action-reviewer/SKILL.md b/skills/nist-800-61r3-after-action-reviewer/SKILL.md new file mode 100644 index 0000000..1b03491 --- /dev/null +++ b/skills/nist-800-61r3-after-action-reviewer/SKILL.md @@ -0,0 +1,269 @@ +--- +name: nist-800-61r3-after-action-reviewer +description: "Evaluate post-incident reports, after-action reports (AARs), and lessons-learned documents against NIST SP 800-61r3's post-incident activity requirements — specifically RS.AN-03, RS.AN-06/07/08, RC.RP-06, and ID.IM-03. Checks root cause completeness, evidence integrity, magnitude validation, stakeholder communications, recovery verification, and whether lessons are actionable. Use when asked to 'review this after-action report', 'evaluate our lessons learned', or 'post-incident review check'." +user-invocable: true +--- + +# NIST SP 800-61r3 After-Action Reviewer + +Evaluate post-incident reports, after-action reports (AARs), and lessons-learned documents against NIST SP 800-61r3 (April 2025) post-incident requirements. + +--- + +## Goal + +SP 800-61r3 defines rigorous requirements for what an after-action report must contain and how the lessons-learned process feeds back into IR program improvement. This skill checks that an AAR captures everything the standard requires — not just "what happened" but "how we know," "what we missed," and "what we're changing." + +--- + +## Inputs + +- The after-action report, post-incident report, or lessons-learned document (text or file path) +- Incident type (data breach, ransomware, account takeover, DoS, insider, etc.) — affects scope +- Was this a major incident? (affects whether senior leadership update requirements apply) + +--- + +## Workflow + +``` +1. Check RC.RP-06 — After-Action Report Completeness + ↓ +2. Check RS.AN-03 — Root Cause Analysis Quality + ↓ +3. Check RS.AN-06/07/08 — Evidence & Records Integrity + ↓ +4. Check RS.CO-02/03 — Communication Retrospective + ↓ +5. Check RC.RP-04/05 — Recovery Verification + ↓ +6. Check ID.IM-03 — Lessons-Learned Loop Closure + ↓ +7. Generate AAR Review Report +``` + +### Step 1: RC.RP-06 — After-Action Report Completeness + +**RC.RP-06.R1** states: "Prepare an after-action report that documents the incident itself, the response and recovery actions taken, and lessons learned." + +Check that the AAR contains all three mandated components: + +**Component A — The Incident** +- [ ] Incident type and classification (per RS.MA-03 categorization) +- [ ] Initial detection method and timestamp (links to DE.AE) +- [ ] Timeline of significant events from first indicator to recovery completion +- [ ] Systems, data, and services affected +- [ ] Estimated or confirmed impact (financial, operational, reputational, regulatory) +- [ ] Threat actor characterization if known (TTPs, attribution confidence) + +**Component B — Response and Recovery Actions** +- [ ] Triage and validation actions (RS.MA-02) +- [ ] Incident management decisions and rationale (RS.MA-03, RS.MA-04) +- [ ] Containment measures applied (RS.MI-01) +- [ ] Eradication actions taken (RS.MI-02) +- [ ] Recovery steps executed in order (RC.RP-02, RC.RP-04) +- [ ] Recovery completion declaration (RC.RP-06) +- [ ] Notifications sent (RS.CO-02, RS.CO-03) with timestamps and recipients + +**Component C — Lessons Learned** +- [ ] What worked well +- [ ] What did not work or was delayed +- [ ] Root cause(s) — not just proximate cause but systemic (see Step 2) +- [ ] Specific, assignable improvement actions (not vague "improve monitoring") +- [ ] Owner and deadline for each improvement action +- [ ] Which CSF Function each improvement targets + +### Step 2: RS.AN-03 — Root Cause Analysis Quality + +SP 800-61r3 RS.AN-03 requires four specific analysis activities: + +**RS.AN-03.R1** — "Determine the sequence of events that have occurred during the incident and which assets and resources were involved in each of those events." +- Check: Is there a complete, timestamped event sequence? +- Check: Are affected assets enumerated at each stage? +- Gap signal: Report says "attacker gained access" without explaining the full chain + +**RS.AN-03.R2** — "Determine what vulnerabilities, threats, and threat actors were directly or indirectly involved in the incident." +- Check: Is the specific vulnerability or misconfiguration that enabled the incident identified? +- Check: Are indirect enablers noted (e.g., missing MFA, stale accounts, excessive permissions)? +- Gap signal: Report blames "phishing" without identifying which controls failed or were absent + +**RS.AN-03.R3** — "Analyze the incident to find the underlying or systemic root causes." +- Check: Does the AAR go beyond the proximate cause (the exploit/vector) to systemic causes? +- Systemic cause examples: patch management failure, training gap, monitoring blind spot, inadequate access controls, architectural weakness +- Technique check: Was a 5-Whys or fishbone analysis performed? +- Gap signal: Report identifies root cause as "an employee clicked a phishing link" — this is a proximate cause, not systemic + +**RS.AN-03.R4** — "Check any deployed cyber deception technology for additional information on attacker behavior." +- Check: If honeypots, deception tokens, or canary files were deployed — were they checked? +- Mark N/A if no deception technology is deployed + +### Step 3: RS.AN-06/07/08 — Evidence & Records Integrity + +**RS.AN-06.R1** — "Safeguard the confidentiality and integrity of incident response records; ensure only authorized personnel have access." +- Check: Does the AAR reference how IR records were protected? +- Check: Is there a statement about who had access to incident data? +- Note: The incident lead is responsible for records safeguarding + +**RS.AN-07.R1** — "Collect and retain evidence per evidence preservation procedures and data retention policies; consider factors including the possibility of prosecution." +- Check: Were forensic images, logs, or artifacts collected and documented? +- Check: Is there reference to chain of custody procedures? +- Check: Was prosecution possibility considered and documented? +- Check: Are retention timelines for collected evidence noted? + +**RS.AN-08.R1** — "Look for indicators of compromise, evidence of persistence, and other signs of an incident on both the assets known to be targeted and other potential targets." +- Check: Was the incident scope validated beyond the initially identified systems? +- Check: Were adjacent/related assets checked for IoCs? +- Check: Was persistence mechanism identified and confirmed removed? +- Gap signal: Report addresses only initially identified host without checking lateral spread + +### Step 4: RS.CO-02/03 — Communication Retrospective + +Review whether required notifications were actually performed: + +**RS.CO-02.R2** — "Follow established procedures concerning incident coordination: what must be reported, to whom, and at what times." +- Check: Were all required internal notifications made (leadership, legal, HR, asset owners)? +- Check: Were notification timestamps documented? + +**RS.CO-02.R3** — "Perform notifications in compliance with incident notification laws/regulations." +- Check: Were regulatory/legal notification obligations triggered? +- Check: If triggered, were they met within required timeframes? +- Check: Is there documentation of the notification decision (even if decided not required)? + +**RS.CO-02.R5** — "Notify law enforcement and regulatory bodies per criteria in IR plan." +- Check: Was law enforcement notification considered and decision documented? + +**RS.CO-03.R2** — "Regularly update senior leadership on status of major incidents." +- Check (major incidents only): Were senior leadership updates provided during the incident? +- Check: Is there a log of leadership communications? + +### Step 5: RC.RP-04/05 — Recovery Verification + +**RC.RP-04.R1** — "Validate that essential services are restored in the appropriate order." +- Check: Does the AAR document the restoration sequence? +- Check: Were critical services restored before non-critical? + +**RC.RP-04.R2** — "Work with system owners to confirm successful restoration and return to normal operations." +- Check: Did system owners sign off on recovery? +- Check: Is there documentation of owner confirmations? + +**RC.RP-04.R3** — "Monitor performance of restored systems to verify adequacy of restoration." +- Check: Was there a post-recovery monitoring period? +- Check: Were any anomalies detected post-restoration? + +**RC.RP-05.R1** — "Check restored assets for IoCs; remediate root causes before production use." +- Check: Were restored systems scanned for IoCs before going back online? +- Check: Were root causes remediated before production restoration? + +**RC.RP-05.R2** — "Verify correctness and adequacy of restoration actions before putting restored system online." +- Check: Was there a pre-production verification step? + +### Step 6: ID.IM-03 — Lessons-Learned Loop Closure + +**ID.IM-03.N3** — "Creating follow-up reports or holding 'lessons learned' meetings when an incident's recovery efforts are concluding, especially if the incident was major." + +**ID.IM-03.N2** — "Improvements that affect IR can be made to the IR program itself (plan, policy, procedures) or to other aspects of cybersecurity risk management (e.g., identifying TTPs not currently blocked by safeguards or flagged by detection technologies)." + +Check: +- [ ] Was a lessons-learned meeting held? (attendees, date documented?) +- [ ] Do improvement actions target IR program changes (not just technical fixes)? +- [ ] Do any lessons feed back to Preparation (GV, ID, PR functions)? +- [ ] Are improvement actions SMART (Specific, Measurable, Assignable, Realistic, Time-bound)? +- [ ] Is there a tracking mechanism for improvement actions? + +**Lessons-captured vs. lessons-actionable assessment:** +- Count total lessons identified +- Count lessons with assigned owner AND deadline +- Ratio = actionability score + +--- + +## Output Format + +``` +=== NIST SP 800-61r3 After-Action Review === +Document: [AAR name/incident reference] +Incident type: [ransomware / data breach / etc.] +Severity: [major / significant / minor] +Standard: NIST SP 800-61r3 (April 2025) + +━━━ RC.RP-06 — AFTER-ACTION REPORT COMPLETENESS ━━━ + +Component A — Incident Documentation +[✓] Incident type and classification: Present (§1) +[✓] Detection method and timestamp: Present (§2.1 — DE alert at 14:23 UTC) +[~] Event timeline: Partial — gaps between 16:00–19:00 UTC unexplained +[✗] Threat actor characterization: Missing — vector identified but no TTP analysis +Gap: RC.RP-06.R1 requires documenting the incident itself; threat actor info +needed for ID.IM-03 improvement loop. + +Component B — Response and Recovery Actions +[✓] Triage and validation: Present (§3) +[✓] Containment measures: Present (§4.1) +[✗] Recovery steps: Missing — report notes systems restored but no sequence +documented. RC.RP-04.R1 requires restoration order validation. + +Component C — Lessons Learned +[~] What worked: Present (§7) +[✗] Actionable improvements: 3 lessons identified, 0 have assigned owners or +deadlines. ID.IM-03 requires SMART improvement actions. + +━━━ RS.AN-03 — ROOT CAUSE ANALYSIS ━━━ + +[✓] R1 — Event sequence: Complete timeline from initial access to containment +[~] R2 — Vulnerability identification: Phishing vector identified; no analysis +of why MFA bypass was possible or why the phishing email bypassed email filtering +[✗] R3 — Systemic root cause: Report identifies "employee clicked phishing link" +as root cause — this is proximate, not systemic. No 5-Whys or structural analysis. +Gap: Missing analysis of: why MFA was not enforced on this account class, +why email filtering did not catch this variant, whether this reflects a pattern. +[N/A] R4 — No deception technology deployed per §1.3 + +━━━ RS.AN-06/07/08 — EVIDENCE & RECORDS ━━━ +[✓] RS.AN-06.R1 — IR records access control referenced (§6.2) +[~] RS.AN-07.R1 — Evidence collected but no retention timeline documented +[✗] RS.AN-08.R1 — Scope validation missing: no evidence adjacent systems +were checked for lateral movement or IoC presence + +━━━ RS.CO COMMUNICATIONS RETROSPECTIVE ━━━ +[✓] Internal notifications made with timestamps (§5.1) +[~] RS.CO-02.R3 — Regulatory notification decision not documented (HIPAA +applicability not assessed in AAR) +[✓] RS.CO-03.R2 — Senior leadership received 3 updates during incident + +━━━ RC.RP RECOVERY VERIFICATION ━━━ +[~] RC.RP-04.R2 — System owner confirmations: verbal only, not documented +[✗] RC.RP-05.R1 — No pre-production IoC scan of restored systems documented + +━━━ ID.IM-03 — LESSONS LEARNED LOOP ━━━ +Lessons identified: 3 +Lessons with owner + deadline: 0 +Actionability score: 0% — all lessons are observations, none are assigned actions + +━━━ SUMMARY ━━━ +Critical gaps: RS.AN-03.R3 (systemic root cause), RC.RP-05.R1 (IoC scan), + ID.IM-03 actionability (no assigned owners) +Significant gaps: RS.AN-08.R1 (scope validation), RC.RP-04.R2 (owner sign-off) +Assessment: AAR captures what happened but will not improve the IR program. +The absence of systemic root cause analysis means the same incident is likely +to recur. The 0% actionability on lessons learned means no improvements will +be tracked or implemented. +``` + +--- + +## Deliverable + +A section-by-section AAR review with citation-anchored findings, a lessons-actionability score, and specific improvement recommendations. Suitable for: +- IR program quality assurance +- Compliance audit support +- AAR revision guidance +- Input to `nist-800-61r3-maturity-scorer` + +--- + +## NEVER + +- **NEVER accept proximate cause as root cause** — "user clicked a link" is never a root cause; the systemic failure enabling that click is +- **NEVER mark lessons learned as sufficient without owner + deadline** — unassigned lessons are observations, not improvements +- **NEVER skip the scope validation check (RS.AN-08)** — unvalidated scope is a top cause of incidents recurring on adjacent systems +- **NEVER omit the communications retrospective** — notification failures have legal and regulatory consequences that surface later diff --git a/skills/nist-800-61r3-csf-mapper/SKILL.md b/skills/nist-800-61r3-csf-mapper/SKILL.md new file mode 100644 index 0000000..113c743 --- /dev/null +++ b/skills/nist-800-61r3-csf-mapper/SKILL.md @@ -0,0 +1,171 @@ +--- +name: nist-800-61r3-csf-mapper +description: "Map content from any cybersecurity document (playbook, IR plan, policy, incident report) to NIST SP 800-61r3 CSF 2.0 elements. Produces a coverage table showing which Functions, Categories, and Subcategories are addressed, partially addressed, or absent. Use when asked to 'map this to NIST', 'what CSF elements does this cover', or 'tag this against 800-61r3'." +user-invocable: true +--- + +# NIST SP 800-61r3 CSF Mapper + +Map any cybersecurity document to the CSF 2.0 Community Profile defined in NIST SP 800-61r3 (April 2025). + +--- + +## Goal + +Produce a structured coverage table that shows which CSF 2.0 elements from the SP 800-61r3 Community Profile are: +- **Addressed** — content explicitly satisfies the element +- **Partial** — content touches the element but incompletely +- **Not Found** — element is absent from the document + +--- + +## Inputs + +- The document to analyze (paste text, provide file path, or describe it) +- Document type (ask if not obvious): IR Plan, Playbook, Incident Report, After-Action Report, IR Policy, Risk Assessment, Other + +--- + +## Workflow + +``` +1. Classify Document + ↓ +2. Determine Relevant CSF Scope + ↓ +3. Scan & Tag Content + ↓ +4. Build Coverage Table + ↓ +5. Output Summary +``` + +### Step 1: Classify Document + +Identify the document type before mapping — different types have different expected coverage profiles: + +| Document Type | Primary CSF Scope Expected | +|----------------------|---------------------------| +| IR Plan | GV.PO, GV.RR, ID.IM-04, RS (all), RC (all) | +| Playbook | RS.MA, RS.AN, RS.MI, DE.AE-08, RC.RP | +| Incident Report | RS.MA, RS.AN, RS.CO, RS.MI, RC.RP | +| After-Action Report | RS.AN-03, RS.AN-06/07, RC.RP-06, ID.IM-03 | +| IR Policy | GV.PO, GV.RR, ID.IM-04 (§2.3 elements) | +| Risk Assessment | ID.RA (all), GV.RM, ID.AM | + +### Step 2: Determine Relevant CSF Scope + +SP 800-61r3 defines two tables: + +**Table 2 — Preparation & Lessons Learned** (GV, ID, PR): +- GV: Govern (GV.OC, GV.RM, GV.RR, GV.PO, GV.OV, GV.SC) +- ID: Identify (ID.AM, ID.RA, ID.IM) +- PR: Protect (PR.AA, PR.AT, PR.DS, PR.PS, PR.IR) + +**Table 3 — Incident Response** (DE, RS, RC) — all elements are High priority: +- DE: Detect (DE.CM, DE.AE) +- RS: Respond (RS.MA, RS.AN, RS.CO, RS.MI) +- RC: Recover (RC.RP, RC.CO) + +For most documents, focus first on Table 3 elements (all High priority), then Table 2. + +### Step 3: Scan & Tag Content + +Read the document and for each meaningful section/paragraph: +1. Identify which CSF Subcategory it most closely satisfies +2. Assess completeness: does it fully satisfy the element or only partially? +3. Note the source location (section number, paragraph, or quote) + +**Key semantic anchors** — phrases that signal specific CSF elements: + +| If document mentions... | Maps to | +|------------------------------------------------|------------------| +| Continuous monitoring, anomaly detection | DE.CM | +| Incident declaration criteria, threshold | DE.AE-08 | +| Triage, severity assessment, validate | RS.MA-02 | +| Incident categorization, incident type | RS.MA-03 | +| Escalation, elevation, resource increase | RS.MA-04 | +| Recovery initiation criteria | RS.MA-05 | +| Root cause analysis, event sequence | RS.AN-03 | +| Evidence collection, chain of custody | RS.AN-07 | +| Incident magnitude, IoC scope | RS.AN-08 | +| Stakeholder notification, breach notification | RS.CO-02 | +| Information sharing, ISAC, threat intel share | RS.CO-03 | +| Containment, isolate, quarantine | RS.MI-01 | +| Eradication, persistence removal, patch | RS.MI-02 | +| Recovery plan execution, restore operations | RC.RP-01/02 | +| Backup integrity, clean restore verification | RC.RP-03/05 | +| After-action report, lessons learned | RC.RP-06, ID.IM-03 | +| IR policy, policy elements | GV.PO, ID.IM-04 | +| Roles, responsibilities, authority | GV.RR-02 | + +### Step 4: Build Coverage Table + +Output a table with these columns: + +| CSF ID | Priority | Description (brief) | Coverage | Evidence/Location | +|--------|----------|---------------------|----------|-------------------| + +Coverage values: `Addressed` / `Partial` / `Not Found` + +Group by Function: DE → RS → RC (Table 3 first), then GV → ID → PR (Table 2). + +Only include elements that are relevant to the document type (skip Low-priority Table 2 elements unless the document explicitly covers them). + +### Step 5: Output Summary + +After the table, provide: +1. **Coverage counts**: X/Y High-priority elements addressed +2. **Top gaps**: The 3–5 most significant missing elements by priority +3. **Document type assessment**: Does the coverage match what's expected for this document type? + +--- + +## Output Format + +``` +=== CSF 2.0 Coverage Map — [Document Name/Type] === +Standard: NIST SP 800-61r3 (April 2025) +Document Type: [classified type] + +TABLE 3 — INCIDENT RESPONSE (All High Priority) +──────────────────────────────────────────────────────────────────────── +CSF ID | Description | Coverage | Location +──────────────────────────────────────────────────────────────────────── +DE.CM | Continuous monitoring of assets | Partial | §2.1 +DE.CM-01 | Network monitoring | Addressed | §2.1.a +DE.CM-09 | Computing HW/SW/runtime monitoring | Not Found | — +DE.AE | Adverse event analysis | Partial | — +DE.AE-02 | Events analyzed to understand activity | Addressed | §3.2 +DE.AE-08 | Incident declaration criteria | Not Found | — +RS.MA | Incident management | Partial | §4 +RS.MA-01 | IR plan executed on incident declaration | Addressed | §4.1 +RS.MA-02 | Incidents triaged and validated | Partial | §4.2 (no severity criteria) +... + +TABLE 2 — PREPARATION & LESSONS LEARNED (Mixed Priority) +[only elements relevant to document type] +... + +SUMMARY +──────── +High-priority (Table 3) coverage: 9/23 elements addressed, 5 partial, 9 not found +Medium-priority coverage: 4/12 addressed +Top gaps: DE.AE-08 (incident criteria), RS.MA-02 (triage), RS.AN-03 (root cause), RS.CO-02 (notifications), RC.RP-06 (after-action) +Assessment: Coverage is below expected for an IR Plan — RS.CO and RC sections are largely absent. +``` + +--- + +## Deliverable + +A coverage table + summary that can be handed directly to `nist-800-61r3-gap-analyzer` for prioritized gap analysis or to `nist-800-61r3-maturity-scorer` for scoring. + +--- + +## NEVER + +- **NEVER invent coverage** — only mark Addressed if the document explicitly addresses the element +- **NEVER skip Table 3** — all DE/RS/RC elements are High priority; always include them +- **NEVER use SP 800-61r2 element IDs** — this skill maps to r3/CSF 2.0 IDs only (no legacy "Preparation → Detection & Analysis" phases) +- **NEVER map vague language to specific elements** — "we monitor our systems" alone does not satisfy DE.CM-01 through DE.CM-09 individually diff --git a/skills/nist-800-61r3-gap-analyzer/SKILL.md b/skills/nist-800-61r3-gap-analyzer/SKILL.md new file mode 100644 index 0000000..1756cbb --- /dev/null +++ b/skills/nist-800-61r3-gap-analyzer/SKILL.md @@ -0,0 +1,218 @@ +--- +name: nist-800-61r3-gap-analyzer +description: "Identify prioritized gaps in any cybersecurity document against the NIST SP 800-61r3 CSF 2.0 Community Profile. Accepts raw document content or a coverage map from nist-800-61r3-csf-mapper and outputs a priority-ranked gap list (High/Medium/Low) with specific SP 800-61r3 citation anchors. Use when asked to 'find gaps in this IR plan', 'what's missing from this playbook', or 'gap analysis against 800-61r3'." +user-invocable: true +--- + +# NIST SP 800-61r3 Gap Analyzer + +Identify and prioritize gaps in any cybersecurity document against the CSF 2.0 Community Profile defined in NIST SP 800-61r3 (April 2025). + +--- + +## Goal + +Produce a prioritized gap list — sorted High → Medium → Low — for every CSF 2.0 element from SP 800-61r3 that is missing or incomplete in the reviewed document. Each gap includes the SP 800-61r3 citation, the specific requirement not met, and a remediation suggestion. + +--- + +## Inputs + +- The document to analyze (text, file path, or description), OR +- A coverage map previously produced by `nist-800-61r3-csf-mapper` +- Document type (if not already classified) + +--- + +## Workflow + +``` +1. Obtain Coverage Map (run mapper if not already done) + ↓ +2. Load SP 800-61r3 Element Baseline + ↓ +3. Identify Gaps + ↓ +4. Classify & Prioritize + ↓ +5. Generate Gap Report +``` + +### Step 1: Obtain Coverage Map + +If a coverage map exists from `nist-800-61r3-csf-mapper`, use it directly. Otherwise, perform inline mapping against the document before gap analysis. + +### Step 2: SP 800-61r3 Element Baseline + +The complete element set from SP 800-61r3, organized by priority: + +**HIGH PRIORITY — Table 3 (Incident Response)** + +| CSF ID | Description | +|------------|-------------| +| DE | Detect Function | +| DE.CM | Continuous Monitoring | +| DE.CM-01 | Networks/network services monitored | +| DE.CM-02 | Physical environment monitored | +| DE.CM-03 | Personnel activity and technology usage monitored | +| DE.CM-06 | External service provider activities monitored | +| DE.CM-09 | Computing HW/SW/runtime/data monitored | +| DE.AE | Adverse Event Analysis | +| DE.AE-02 | Events analyzed to understand associated activities | +| DE.AE-03 | Information correlated from multiple sources | +| DE.AE-04 | Estimated impact and scope of adverse events understood | +| DE.AE-06 | Information on adverse events provided to authorized staff/tools | +| DE.AE-07 | CTI and contextual information integrated into analysis | +| DE.AE-08 | Incidents declared when adverse events meet incident criteria | +| RS | Respond Function | +| RS.MA | Incident Management | +| RS.MA-01 | IR plan executed on incident declaration | +| RS.MA-02 | Incident reports triaged and validated | +| RS.MA-03 | Incidents categorized and prioritized | +| RS.MA-04 | Incidents escalated or elevated as needed | +| RS.MA-05 | Recovery initiation criteria applied | +| RS.AN | Incident Analysis | +| RS.AN-03 | Root cause and event sequence established | +| RS.AN-06 | Investigation actions recorded with integrity | +| RS.AN-07 | Incident data/metadata collected with integrity preserved | +| RS.AN-08 | Incident magnitude estimated and validated | +| RS.CO | Incident Reporting and Communication | +| RS.CO-02 | Internal and external stakeholders notified | +| RS.CO-03 | Information shared with designated stakeholders | +| RS.MI | Incident Mitigation | +| RS.MI-01 | Incidents contained | +| RS.MI-02 | Incidents eradicated | +| RC | Recover Function | +| RC.RP | Incident Recovery Plan Execution | +| RC.RP-01 | Recovery portion of IR plan executed | +| RC.RP-02 | Recovery actions selected, scoped, prioritized, performed | +| RC.RP-03 | Backup/restoration asset integrity verified | +| RC.RP-04 | Critical mission functions considered post-incident | +| RC.RP-05 | Restored asset integrity verified, normal status confirmed | +| RC.RP-06 | End of incident recovery declared, documentation completed | +| RC.CO | Incident Recovery Communication | +| RC.CO-03 | Recovery activities/progress communicated to stakeholders | +| RC.CO-04 | Public updates shared using approved methods/messaging | + +**MEDIUM PRIORITY — Table 2 (selected high-value elements)** + +| CSF ID | Description | +|------------|-------------| +| GV.OC-03 | Legal, regulatory, contractual requirements understood | +| GV.RR | Roles, responsibilities, authorities established | +| GV.RR-02 | Roles/responsibilities documented and enforced | +| GV.PO | Organizational cybersecurity policy established | +| GV.OV-01 | Risk management strategy outcomes reviewed | +| GV.SC-08 | Relevant suppliers included in IR planning/response | +| ID.AM-01 | HW inventories maintained | +| ID.AM-02 | SW/services inventories maintained | +| ID.RA-02 | CTI received from sharing forums/sources | +| ID.RA-05 | Threats/vulns/likelihoods used to inform risk response | +| ID.RA-06 | Risk responses chosen, tracked, communicated | +| ID.IM-01 | Improvements identified from evaluations | +| ID.IM-02 | Improvements from security tests and exercises | +| ID.IM-03 | Improvements from operational execution | +| ID.IM-04 | IR plans established, maintained, improved | +| PR.DS-11 | Backups created, protected, maintained, tested | +| PR.PS-04 | Log records generated for continuous monitoring | + +**LOW PRIORITY — Table 2 (informational)** + +All remaining GV, ID, PR elements not listed above. + +### Step 3: Identify Gaps + +For each element in the baseline, determine its status in the coverage map: +- `Not Found` → Gap +- `Partial` → Partial Gap (flag as gap with lower severity modifier) +- `Addressed` → No gap + +### Step 4: Classify & Prioritize + +For each gap, assign: + +**Severity** (for High-priority elements): +- **Critical**: Element is absent and directly impacts incident handling (RS.MA-02, DE.AE-08, RS.AN-03, RS.MI-01, RC.RP-06) +- **Significant**: Element is absent but has workarounds or is partially covered +- **Minor**: Partial coverage that needs strengthening + +**Gap type**: +- **Missing**: No evidence of the element +- **Incomplete**: Element addressed but key sub-requirements absent + +### Step 5: Generate Gap Report + +--- + +## Output Format + +``` +=== NIST SP 800-61r3 Gap Analysis Report === +Document: [name/type] +Standard: NIST SP 800-61r3 (April 2025) +Analysis date: [date] + +━━━ CRITICAL GAPS (High Priority — Table 3) ━━━ + +[C1] DE.AE-08 — Incident Declaration Criteria + Status: Missing + Requirement: "Apply incident criteria to known and assumed characteristics + of analyzed activity, and consider known false positives to determine + whether an incident should be declared." (DE.AE-08.R1) + Impact: Without defined declaration criteria, incident response may start + too late or on false positives — undermining RS.MA-02 triage. + Remediation: Define specific observable thresholds (e.g., confirmed C2 + beacon, lateral movement detected, data staged for exfil) that constitute + an incident declaration trigger. + +[C2] RS.MA-02 — Incident Triage and Validation + Status: Partial (severity categories present, no urgency criteria) + Requirement: "Perform a preliminary review of a new incident report to + verify that a cybersecurity incident has occurred, then estimate the + severity and urgency needed to respond to it." (RS.MA-02.R1) + Impact: Severity tiers exist but no time-based urgency criteria — teams + cannot determine response SLAs. + Remediation: Add urgency criteria alongside severity (e.g., P1 = respond + within 1hr, P2 = 4hr, P3 = 24hr). + +... + +━━━ SIGNIFICANT GAPS (High Priority) ━━━ + +[S1] RS.CO-02 — Stakeholder Notification + ... + +━━━ MEDIUM PRIORITY GAPS (Table 2) ━━━ + +[M1] ID.IM-04 — IR Plans Maintained and Improved + ... + +━━━ SUMMARY ━━━ + +Critical gaps: 3 (must address before next incident) +Significant gaps: 5 (address within 30 days) +Minor gaps: 4 (address in next program review cycle) +Medium gaps: 6 +Low gaps: 2 + +Highest-impact single fix: Adding DE.AE-08 incident declaration criteria would +immediately improve RS.MA-02, RS.MA-03, and RS.MA-04 effectiveness. +``` + +--- + +## Deliverable + +A prioritized, citation-anchored gap report suitable for: +- Direct input to `nist-800-61r3-maturity-scorer` +- Action items for IR program improvement backlog +- Audit findings documentation + +--- + +## NEVER + +- **NEVER mark a gap as Critical unless it is in Table 3 AND directly impacts incident handling** +- **NEVER suggest remediation that adds bureaucracy** — all suggestions should be practical, not compliance theater +- **NEVER conflate Missing with Partial** — partial coverage is progress; acknowledge what exists +- **NEVER omit citation anchors** — every finding must cite the specific CSF ID and R/C/N item from SP 800-61r3 diff --git a/skills/nist-800-61r3-maturity-scorer/SKILL.md b/skills/nist-800-61r3-maturity-scorer/SKILL.md new file mode 100644 index 0000000..e59c492 --- /dev/null +++ b/skills/nist-800-61r3-maturity-scorer/SKILL.md @@ -0,0 +1,299 @@ +--- +name: nist-800-61r3-maturity-scorer +description: "Score an organization's IR program maturity against the NIST SP 800-61r3 CSF 2.0 Community Profile using a weighted scoring model tied to element priority (High/Medium/Low). Produces a per-function heatmap, weighted overall score, maturity level (1-5), and the highest-leverage improvement actions. Use when asked to 'score this against NIST', 'maturity assessment', 'how mature is our IR program', or 'NIST compliance score'." +user-invocable: true +--- + +# NIST SP 800-61r3 Maturity Scorer + +Score an organization's IR program maturity against the complete CSF 2.0 Community Profile from NIST SP 800-61r3 (April 2025). + +--- + +## Goal + +Produce a quantitative maturity score grounded in SP 800-61r3's own priority system. High-priority elements (all Table 3 elements) have the most weight; Medium-priority elements (Table 2 selected) have moderate weight; Low-priority elements contribute minimally. Output a score, maturity level, per-function heatmap, and the top improvement actions by ROI. + +--- + +## Inputs + +- Coverage data from any of: + - Raw document (will map inline) + - Coverage map from `nist-800-61r3-csf-mapper` + - Gap report from `nist-800-61r3-gap-analyzer` + - Recommendation audit from `nist-800-61r3-recommendation-auditor` + - Combined inputs from multiple documents (whole-program assessment) +- Assessment mode: **Document** (single doc) or **Program** (entire IR program across multiple docs) + +--- + +## Workflow + +``` +1. Determine Element Coverage + ↓ +2. Apply Weighted Scoring Model + ↓ +3. Compute Per-Function Scores + ↓ +4. Compute Overall Weighted Score + ↓ +5. Assign Maturity Level + ↓ +6. Identify High-ROI Improvements + ↓ +7. Generate Maturity Report +``` + +### Step 1: Determine Element Coverage + +For each SP 800-61r3 element, assign a coverage score: +- **1.0** — Fully addressed (evidence is explicit and complete) +- **0.5** — Partially addressed (element present but key sub-requirements missing) +- **0.0** — Not found or absent + +If working from a raw document, perform inline mapping before scoring. + +### Step 2: Weighted Scoring Model + +SP 800-61r3's own priority system drives the weights: + +| Priority | Source | Weight | +|----------|--------|--------| +| High | Table 3 (all DE/RS/RC elements) | 3 | +| Medium | Table 2 selected GV/ID/PR elements | 2 | +| Low | Table 2 remaining GV/ID/PR elements | 1 | + +**Score formula per function:** +``` +Function_score = Σ(element_coverage × element_weight) / Σ(element_weight) +``` + +**Overall weighted score:** +``` +Overall = Σ(all element_coverage × element_weight) / Σ(all element_weight) +``` + +### Step 3: SP 800-61r3 Element Registry with Weights + +**HIGH WEIGHT (3) — Table 3 Elements** + +| CSF ID | Weight | Description | +|--------|--------|-------------| +| DE.CM | 3 | Continuous Monitoring (parent) | +| DE.CM-01 | 3 | Network monitoring | +| DE.CM-02 | 3 | Physical environment monitoring | +| DE.CM-03 | 3 | Personnel activity monitoring | +| DE.CM-06 | 3 | External service provider monitoring | +| DE.CM-09 | 3 | Computing HW/SW/runtime monitoring | +| DE.AE | 3 | Adverse Event Analysis (parent) | +| DE.AE-02 | 3 | Events analyzed | +| DE.AE-03 | 3 | Information correlated from multiple sources | +| DE.AE-04 | 3 | Impact and scope estimated | +| DE.AE-06 | 3 | Info provided to authorized staff/tools | +| DE.AE-07 | 3 | CTI integrated into analysis | +| DE.AE-08 | 3 | Incident declaration criteria | +| RS.MA | 3 | Incident Management (parent) | +| RS.MA-01 | 3 | IR plan executed on declaration | +| RS.MA-02 | 3 | Incidents triaged and validated | +| RS.MA-03 | 3 | Incidents categorized and prioritized | +| RS.MA-04 | 3 | Incidents escalated/elevated | +| RS.MA-05 | 3 | Recovery initiation criteria applied | +| RS.AN | 3 | Incident Analysis (parent) | +| RS.AN-03 | 3 | Root cause established | +| RS.AN-06 | 3 | Investigation actions recorded with integrity | +| RS.AN-07 | 3 | Incident data collected with integrity | +| RS.AN-08 | 3 | Incident magnitude estimated | +| RS.CO | 3 | Incident Communication (parent) | +| RS.CO-02 | 3 | Stakeholders notified | +| RS.CO-03 | 3 | Information shared with stakeholders | +| RS.MI | 3 | Incident Mitigation (parent) | +| RS.MI-01 | 3 | Incidents contained | +| RS.MI-02 | 3 | Incidents eradicated | +| RC.RP | 3 | Recovery Plan Execution (parent) | +| RC.RP-01 | 3 | Recovery plan executed | +| RC.RP-02 | 3 | Recovery actions performed | +| RC.RP-03 | 3 | Backup integrity verified | +| RC.RP-04 | 3 | Critical functions considered post-incident | +| RC.RP-05 | 3 | Restored asset integrity verified | +| RC.RP-06 | 3 | Recovery declared, documentation completed | +| RC.CO | 3 | Recovery Communication (parent) | +| RC.CO-03 | 3 | Recovery progress communicated | +| RC.CO-04 | 3 | Public updates shared | + +**MEDIUM WEIGHT (2) — Table 2 Selected** + +| CSF ID | Weight | +|--------|--------| +| GV.RR | 2 | +| GV.RR-02 | 2 | +| GV.PO | 2 | +| GV.OC-03 | 2 | +| GV.SC-08 | 2 | +| GV.OV-01 | 2 | +| ID.RA-02 | 2 | +| ID.RA-05 | 2 | +| ID.RA-06 | 2 | +| ID.AM-01 | 2 | +| ID.AM-02 | 2 | +| ID.IM-01 | 2 | +| ID.IM-02 | 2 | +| ID.IM-03 | 2 | +| ID.IM-04 | 2 | +| PR.DS-11 | 2 | +| PR.PS-04 | 2 | + +**LOW WEIGHT (1) — Table 2 Remaining** + +All remaining GV, ID, PR elements not listed above (GV.OC-01/02/04/05, GV.RM-01–07, GV.RR-01/03/04, GV.PO-01/02, GV.OV-02/03, GV.SC-01–10, ID.AM-03–08, ID.RA-01/03/04/07/08/09/10, PR.AA, PR.AT, PR.DS, PR.PS, PR.IR) + +### Step 4: Per-Function Scores + +Compute a score for each of the 6 CSF Functions independently, using only the elements belonging to that function. + +Convert to percentage and render a visual bar: +``` +DE (Detect) [████████░░] 80% +RS (Respond) [██████░░░░] 60% +RC (Recover) [████████░░] 80% +GV (Govern) [██████░░░░] 55% +ID (Identify)[████░░░░░░] 45% +PR (Protect) [████░░░░░░] 40% +``` + +### Step 5: Maturity Level Assignment + +Map the overall weighted score to a maturity level: + +| Score | Level | Name | Characterization | +|-------|-------|------|-----------------| +| 0–29% | 1 | Initial | No consistent IR practices; ad hoc and reactive | +| 30–49% | 2 | Developing | Basic IR exists but is undocumented or inconsistently applied | +| 50–69% | 3 | Defined | Documented IR program exists; key processes established | +| 70–84% | 4 | Managed | Measured IR program; performance tracked; continuous improvement in place | +| 85–100% | 5 | Optimizing | IR integrated into all cybersecurity risk management; lessons feed back continuously | + +These levels correspond loosely to CSF 2.0 Implementation Tiers 1–4 and the maturity concepts from ID.IM. + +### Step 6: High-ROI Improvements + +Identify the top 5 improvements with highest score impact: + +For each Not Met or Partial element, compute: +- **Score uplift**: (1.0 - current_coverage) × weight / total_weighted_points +- **Dependencies unlocked**: how many other elements improve if this one is addressed + +Rank by (score_uplift × 3) + (dependencies_unlocked × 2). + +Common high-ROI improvements (based on typical IR programs): +1. **DE.AE-08** (incident declaration criteria) — weight 3; enables RS.MA-01 through RS.MA-05 +2. **ID.IM-04** (IR plan maintenance) — weight 2; foundational for all Table 3 elements +3. **RS.AN-03** (root cause analysis) — weight 3; enables ID.IM-03 improvement loop +4. **RS.CO-02** (stakeholder notifications) — weight 3; legal/regulatory exposure reduction +5. **PR.PS-04** (log records) — weight 2; enables DE.AE-02/03/07 detection capabilities + +### Step 7: Generate Maturity Report + +--- + +## Output Format + +``` +╔══════════════════════════════════════════════════════════════════╗ +║ NIST SP 800-61r3 IR Maturity Assessment ║ +║ Standard: NIST SP 800-61r3 (April 2025), CSF 2.0 ║ +╚══════════════════════════════════════════════════════════════════╝ + +Assessment Mode: [Document / Program] +Document(s): [list] +Assessment Date: [date] + +━━━ MATURITY SCORE ━━━ + +Overall Weighted Score: 61% +┌─────────────────────────────┐ +│ MATURITY LEVEL 3: DEFINED │ +│ Score range: 50–69% │ +└─────────────────────────────┘ +Documented IR program exists with key processes established. +Gaps remain in continuous improvement loop and detection coverage. + +━━━ PER-FUNCTION SCORES ━━━ + +Function Coverage Score Bar +──────────── ───────── ────── ────────────────────── +DE (Detect) 11/14 el 74% ████████████████░░░░░ +RS (Respond) 15/25 el 58% ████████████░░░░░░░░░ +RC (Recover) 10/12 el 80% ████████████████░░░░ +GV (Govern) 9/18 el 50% ██████████░░░░░░░░░░░ +ID (Identify) 8/18 el 44% █████████░░░░░░░░░░░░ +PR (Protect) 7/15 el 47% █████████░░░░░░░░░░░░ + +Strongest function: RC (Recover) — 80% +Weakest function: ID (Identify) — 44% +Table 3 (IR) score: 67% ← primary operational readiness indicator +Table 2 (Prep) score: 47% ← preparation and prevention readiness + +━━━ ELEMENT COVERAGE DETAIL ━━━ + +High Priority (Table 3): 24/39 elements addressed (3 partial) +Medium Priority (Table 2): 9/17 elements addressed (2 partial) +Low Priority (Table 2): 11/24 elements addressed + +━━━ TOP 5 HIGH-ROI IMPROVEMENTS ━━━ + +Rank Element Score Uplift Dependencies Action +──── ───────────── ──────────── ──────────── ────────────────────────────── + 1 DE.AE-08 +3.2% 5 unlocked Define incident declaration + criteria with false-positive + guidance + 2 RS.AN-03 +2.8% 3 unlocked Implement RCA methodology + (5-Whys or fishbone) in all + AAR procedures + 3 ID.IM-04.R2 +1.6% 2 unlocked Establish formal IR plan + review cycle (annual + + post-major-incident trigger) + 4 RS.CO-02.R3 +2.4% 0 unlocked Document regulatory + notification obligations by + jurisdiction and sector + 5 PR.PS-04 +1.8% 4 unlocked Implement centralized log + management covering all + SP 800-61r3 DE.CM asset types + +Implementing all 5 would raise score to approximately 72% (Level 4: Managed). + +━━━ BENCHMARKING ━━━ + +Level 3 (Defined) organizations typically: +✓ Have documented IR policies and procedures +✓ Have incident classification and triage processes +✗ Lack continuous improvement mechanisms +✗ Have gaps in detection coverage (DE elements) +✗ Do not systematically measure IR program performance + +This assessment is consistent with a Level 3 organization. + +━━━ TREND (if prior assessment available) ━━━ +Prior score: N/A +Change: N/A +``` + +--- + +## Deliverable + +A scored maturity report with function-level heatmap, maturity level assignment, and prioritized improvement roadmap. Suitable for: +- Executive reporting on IR program state +- Security program roadmap planning +- Board/leadership cybersecurity briefings +- Benchmarking across assessment cycles + +--- + +## NEVER + +- **NEVER report a score without showing the per-function breakdown** — an overall score without function detail masks where the real problems are +- **NEVER assign Maturity Level 4 or 5 without verifying the ID.IM improvement loop** — a program that doesn't measure itself cannot be Managed or Optimizing +- **NEVER use the score as a compliance checkbox** — maturity scores measure program capability, not regulatory compliance; explicitly state this limitation +- **NEVER round up partial coverage** — 0.5 is 0.5; a half-addressed element is not the same as a fully addressed one diff --git a/skills/nist-800-61r3-policy-reviewer/SKILL.md b/skills/nist-800-61r3-policy-reviewer/SKILL.md new file mode 100644 index 0000000..a0e62c4 --- /dev/null +++ b/skills/nist-800-61r3-policy-reviewer/SKILL.md @@ -0,0 +1,248 @@ +--- +name: nist-800-61r3-policy-reviewer +description: "Evaluate IR policy and procedure documents against NIST SP 800-61r3 Section 2.3 required policy elements and the GV.PO/GV.RR/ID.IM-04 CSF categories. Checks for all 8 required policy elements, role authority designations, and plan maintenance requirements. Use when asked to 'review this IR policy', 'check our incident response policy', 'is this policy complete', or 'policy completeness check'." +user-invocable: true +--- + +# NIST SP 800-61r3 Policy Reviewer + +Evaluate IR policies, plans, and procedure documents against the requirements defined in NIST SP 800-61r3 Section 2.3 and the associated CSF 2.0 GV/ID elements. + +--- + +## Goal + +SP 800-61r3 Section 2.3 explicitly defines the required elements of an IR policy. This skill checks that a policy document contains all required elements, properly designates roles and authorities, and establishes the plan maintenance framework needed to stay current. + +--- + +## Inputs + +- The IR policy, IR plan, or procedures document to review (text or file path) +- Organization type (federal agency, private sector, MSSP) — affects regulatory notification requirements + +--- + +## Workflow + +``` +1. Load Section 2.3 Policy Checklist + ↓ +2. Check 8 Required Policy Elements + ↓ +3. Check Roles & Authorities (GV.RR-02) + ↓ +4. Check Plan Framework (ID.IM-04) + ↓ +5. Check Shared Responsibility & Supply Chain (GV.SC-08) + ↓ +6. Generate Policy Review Report +``` + +### Step 1: Section 2.3 Policy Checklist + +SP 800-61r3 §2.3 states that most IR policies include the same key elements. These are the required policy elements: + +| # | Element | CSF Anchor | +|---|---------|-----------| +| 1 | Statement of management commitment | GV.RR-01 | +| 2 | Purpose and objectives of the policy | GV.PO | +| 3 | Scope of the policy (to whom/what it applies, under what circumstances) | GV.PO | +| 4 | Definition of events, cybersecurity incidents, investigations, and related terms | DE.AE-08 | +| 5 | Roles, responsibilities, and authorities (including who has authority to confiscate/disconnect/shut down assets) | GV.RR-02 | +| 6 | Guidelines for prioritizing incidents, estimating severity, initiating recovery, maintaining/restoring operations, and other key actions | RS.MA-03, RS.MA-05 | +| 7 | Performance measures | ID.IM-01 | +| 8 | Shared responsibility model with third parties (MSSPs, CSPs, ISPs) if applicable | GV.SC-08, GV.RR-02 | + +### Step 2: Check 8 Required Policy Elements + +For each element, determine: +- **Present** — policy explicitly addresses this element +- **Partial** — element is referenced but incomplete (e.g., roles listed but no authority designations) +- **Missing** — no evidence of the element + +**Detailed checks per element:** + +**Element 1 — Management Commitment** +- Is there a signed/dated endorsement from executive leadership? +- Does it reference resource allocation for IR? +- CSF: GV.RR-01 — "Organizational leadership is responsible and accountable for cybersecurity risk" + +**Element 2 — Purpose and Objectives** +- Does the policy state why it exists? +- Does it reference reducing incident impact and improving response effectiveness? +- CSF: GV.PO + +**Element 3 — Scope** +- Who does this apply to? (employees, contractors, third parties, specific systems) +- What environments are covered? (cloud, on-prem, OT/ICS, mobile) +- Are exclusions documented? +- CSF: GV.PO + +**Element 4 — Definitions** +- Are "event," "adverse event," and "cybersecurity incident" defined per SP 800-61r3 Appendix B? +- Is "incident" distinguished from "event" — incidents jeopardize CIA of information or constitute policy violations +- Are investigation-related terms defined? +- CSF: DE.AE-08 + +**Element 5 — Roles, Responsibilities, and Authorities** +- Are incident response roles explicitly named (incident lead, handlers, legal, HR, comms, leadership)? +- Does the policy designate which roles have authority to: + - Confiscate assets + - Disconnect systems from the network + - Shut down technology assets +- Are third-party roles (MSSP, CSP, law enforcement) addressed? +- CSF: GV.RR-02.R1, GV.RR-02.R2 + +**Element 6 — Prioritization, Severity, and Recovery Guidelines** +- Are incident severity/priority tiers defined (e.g., P1–P4 or Critical/High/Medium/Low)? +- Are risk evaluation factors listed? SP 800-61r3 RS.MA.N2 suggests: asset criticality, functional impact, data impact, stage of observed activity, threat actor characterization, recoverability +- Are recovery initiation criteria defined? (RS.MA-05.R1) +- Are time-based response SLAs included? +- CSF: RS.MA-03.R1, RS.MA-05.R1 + +**Element 7 — Performance Measures** +- Are IR program performance metrics defined? +- Examples: mean time to detect, mean time to respond, mean time to recover, % incidents meeting SLAs, % staff trained +- Is there a review cycle for measuring performance against these metrics? +- CSF: ID.IM-01.R1 — "Periodically evaluate IR program performance to identify problems" + +**Element 8 — Shared Responsibility Model** +- If the organization uses MSSPs/CSPs/ISPs: are their responsibilities documented? +- Are contracts/SLAs referenced? +- Are information flow and coordination authorities defined? +- Are restrictions on third-party actions documented (e.g., cannot share sanitized incident info with other customers)? +- CSF: GV.SC-08, GV.RR-02.R3 + +### Step 3: Roles & Authorities Deep Check (GV.RR-02) + +Check specifically for GV.RR-02.R1, R2, R3: + +**R1 — All IR roles documented in organizational policies:** +- [ ] Incident Response Team / SOC roles documented +- [ ] Legal counsel role in IR documented +- [ ] HR role documented (GV.RR-02 + RS.CO-03.R3) +- [ ] Public affairs / media relations role documented +- [ ] Physical security and facilities role documented +- [ ] Asset owners role documented +- [ ] Leadership decision authority documented + +**R2 — All appropriate individuals designated the authority to fulfill IR responsibilities:** +- [ ] Named or role-titled individuals with authority to isolate/disconnect systems +- [ ] Named or role-titled individuals with authority to invoke business continuity plans +- [ ] Named or role-titled individuals with authority to engage law enforcement +- [ ] Escalation/elevation triggers and authority chain documented (RS.MA-04) + +### Step 4: Plan Framework Check (ID.IM-04) + +SP 800-61r3 ID.IM-04 has four recommendations for all cybersecurity plans: + +- **R1** — Synchronize business continuity plans with incident response plans + - Check: Does the policy reference BCP/DRP alignment? +- **R2** — Review and update all cybersecurity plans periodically or when significant improvements are identified + - Check: Is there a review/update cycle defined? (Annual, post-incident trigger?) +- **R3** — Base each plan on the organization's unique requirements, mission, size, structure, and functions + - Check: Is the policy tailored to the org or clearly a generic template? +- **R4** — Each plan identifies the resources and management support needed + - Check: Are resource requirements (staff, tools, budget) referenced? + +### Step 5: Regulatory & Notification Check (GV.OC-03) + +Based on organization type, verify the policy addresses applicable notification requirements: + +**GV.OC-03.R1** — Cybersecurity requirements include all IR-related requirements (incident notification, data breach reporting) + +Check: +- [ ] Notification obligations referenced (FISMA, HIPAA, PCI-DSS, GDPR, state breach laws — as applicable) +- [ ] Breach notification timelines addressed (RS.CO-02.R3) +- [ ] Law enforcement notification criteria defined (RS.CO-02.R5) +- [ ] Regulatory body notification criteria defined + +### Step 6: Generate Policy Review Report + +--- + +## Output Format + +``` +=== NIST SP 800-61r3 IR Policy Review === +Document: [policy name/version] +Organization type: [federal / private / MSSP] +Standard: NIST SP 800-61r3 §2.3 + CSF 2.0 GV/ID elements + +━━━ SECTION 2.3 REQUIRED ELEMENTS CHECKLIST ━━━ + +[✓] Element 1 — Management Commitment + Found: Executive VP signature, dated 2025-01-15; §1.1 + Note: Does not reference specific resource commitments (FTE, budget) + +[✗] Element 4 — Definitions + Missing: "Event" and "adverse event" are not defined. + The policy uses "incident" broadly without distinguishing security events + from confirmed cybersecurity incidents per FISMA 2014 definition. + Required by: DE.AE-08, SP 800-61r3 Appendix B + Fix: Add a definitions section with SP 800-61r3 Appendix B terms at minimum. + +[~] Element 5 — Roles, Responsibilities, and Authorities + Partial: Roles listed (IR Team, Legal, HR) but authority to disconnect + systems is not designated to any specific role. + Required by: GV.RR-02.R2 — "All appropriate individuals should be + designated the authority necessary to fulfill their IR responsibilities" + Fix: Add explicit authority table specifying who can isolate/disconnect/ + shut down each asset class. + +[✓] Element 6 — Prioritization Guidelines + Found: Four-tier severity model (P1-P4) with response SLAs; §3.2 + Note: Recovery initiation criteria (RS.MA-05.R1) are absent — policy + defines when to start responding but not when to start recovering. + +[✗] Element 7 — Performance Measures + Missing: No IR performance metrics defined anywhere in the document. + Required by: ID.IM-01.R1 + Fix: Define at minimum: MTTD, MTTR, % incidents within SLA, + % staff completing IR training annually. + +━━━ ROLES & AUTHORITIES (GV.RR-02) ━━━ +[✓] IR Team roles documented +[✓] Legal counsel role documented +[~] HR role referenced but not scoped for insider threat (RS.CO-03.R3) +[✗] Physical security role not mentioned +[✗] No named authority to invoke BCP/DRP during incident + +━━━ PLAN FRAMEWORK (ID.IM-04) ━━━ +[~] R1 — BCP alignment mentioned in passing, not formally linked (§1.4) +[✓] R2 — Annual review cycle defined (§6.1) +[✗] R3 — Document appears to be a generic template; lacks org-specific context +[~] R4 — "Resources as needed" language present but no specifics + +━━━ REGULATORY/NOTIFICATION (GV.OC-03) ━━━ +[~] Notification obligations referenced but no specific laws cited +[✗] No breach notification timelines defined + +━━━ SUMMARY ━━━ +Present: 4 / 8 required elements fully addressed +Partial: 2 / 8 elements partially addressed +Missing: 2 / 8 required elements absent + +Critical gaps: Element 4 (definitions) and Element 7 (performance measures) +Policy assessment: Not ready for operational use without addressing critical gaps. +Estimated effort to remediate: LOW — most gaps are additions, not rewrites. +``` + +--- + +## Deliverable + +A checklist-based policy review report with element-by-element findings, direct SP 800-61r3 citations, and specific remediation guidance. Suitable for: +- IR policy audit findings +- Input to `nist-800-61r3-maturity-scorer` +- Policy revision task list + +--- + +## NEVER + +- **NEVER accept "implied" policy elements** — if an element is not explicit in the document, it is Missing +- **NEVER skip the authority designations check** — GV.RR-02.R2 is one of the most commonly missed requirements +- **NEVER recommend creating a separate document for each gap** — all elements should be in the policy; suggest additions to the existing document +- **NEVER skip the definitions check** — undefined terms (especially "event" vs. "incident") cause real operational confusion during actual incidents diff --git a/skills/nist-800-61r3-recommendation-auditor/SKILL.md b/skills/nist-800-61r3-recommendation-auditor/SKILL.md new file mode 100644 index 0000000..f8c2868 --- /dev/null +++ b/skills/nist-800-61r3-recommendation-auditor/SKILL.md @@ -0,0 +1,270 @@ +--- +name: nist-800-61r3-recommendation-auditor +description: "Audit whether a cybersecurity document satisfies the R-tagged (Recommendation) items from NIST SP 800-61r3's CSF 2.0 Community Profile. R-items are the things SP 800-61r3 says organizations *should* do — the highest-signal findings. Outputs a Met/Partial/Not Met assessment per R-item with direct quotes from the standard. Use when asked to 'check all the recommendations', 'audit R items', or 'what recommendations aren't met'." +user-invocable: true +--- + +# NIST SP 800-61r3 Recommendation Auditor + +Audit whether a document satisfies the R-tagged recommendations from the SP 800-61r3 CSF 2.0 Community Profile (April 2025). + +--- + +## Goal + +SP 800-61r3 uses three annotation types in its Community Profile tables: +- **R** = Recommendation: "the organization **should** do this" +- **C** = Consideration: "the organization **should consider** doing this" +- **N** = Note: informational only + +This skill focuses on **R-items** (mandatory-intent recommendations) — these are the highest-signal findings in an audit. C-items are assessed secondarily. N-items are informational only and not scored. + +--- + +## Inputs + +- Document to audit (text, file path, or description) +- Document type (ask if unclear) +- Scope: Full audit (all R-items) or targeted (specific CSF Function) + +--- + +## Workflow + +``` +1. Load R-item Catalog + ↓ +2. Classify Document & Set Scope + ↓ +3. Audit Each R-item + ↓ +4. Score Results + ↓ +5. Generate Audit Report +``` + +### Step 1: R-item Catalog + +The complete R-item catalog from SP 800-61r3, grouped by priority tier: + +**HIGH PRIORITY — Table 3 (all R-items here are critical)** + +**DE.CM (Continuous Monitoring)** +- `DE.CM.R1` — Continuous monitoring must cover: networks/network services; computing HW/SW/runtime/data; physical environment; personnel activity and technology usage; external service provider activities +- `DE.CM.R2` — Tune monitoring technologies to reduce false positives and false negatives to acceptable levels +- `DE.CM-01.R1` — Monitoring must include wired/wireless networks, network communications/flows, network services (DNS, BGP), and unauthorized/rogue networks +- `DE.CM-02.R1` — Physical environment monitoring must include all access attempts, movement of people/equipment, and tampering signs +- `DE.CM-03.R1` — Personnel activity monitoring must include anomalous user activity, authentication attempts, and deception technology +- `DE.CM-06.R1` — External provider monitoring must include remote/on-site admin, maintenance activities, deviations from expected cloud/ISP behavior +- `DE.CM-09.R1` — Monitor email, web, file sharing, collaboration services for malware/phishing/exfil +- `DE.CM-09.R2` — Monitor authentication attempts for credential attacks and unauthorized credential use +- `DE.CM-09.R3` — Monitor software/hardware configurations for deviations from security baselines +- `DE.CM-09.R4` — Monitor hardware/software including cybersecurity protection mechanisms for tampering/failure/compromise +- `DE.CM-09.R5` — Monitor endpoints for cyber health issues (missing patches, malware, unauthorized software) + +**DE.AE (Adverse Event Analysis)** +- `DE.AE.R1` — Rely on technical solutions to filter large event datasets to human-viewable subsets +- `DE.AE.R2` — Proactively find incidents earlier in the attack life cycle +- `DE.AE-02.R1` — Use SIEM/SOAR to continuously monitor log events for known malicious/suspicious activity +- `DE.AE-02.R2` — Use up-to-date CTI in log analysis tools to improve detection accuracy +- `DE.AE-02.R3` — Regularly conduct manual reviews for technologies that cannot be sufficiently auto-monitored +- `DE.AE-03.R1` — Constantly transfer log data from sources to a small number of log servers +- `DE.AE-03.R2` — Use event correlation technology (SIEM/SOAR) to gather related data from multiple sources +- `DE.AE-03.R3` — Use CTI to correlate events among log sources +- `DE.AE-04.R1` — Estimate impact/scope through automated (SIEM/SOAR) and/or manual means; review and refine estimates +- `DE.AE-06.R1` — Generate alerts and provide to cybersecurity/IR tools and staff (SOC, incident responders) +- `DE.AE-06.R2` — Make log analysis findings accessible to incident responders at all times +- `DE.AE-07.R1` — Integrate up-to-date CTI and asset inventories into adverse event analysis +- `DE.AE-07.R2` — Rapidly acquire and analyze vulnerability disclosures from suppliers/vendors/advisories +- `DE.AE-08.R1` — Apply incident criteria to analyzed activity; consider known false positives to determine incident declaration + +**RS.MA (Incident Management)** +- `RS.MA.R1` — Do not handle incidents on first-come, first-served basis — use risk evaluation factors +- `RS.MA.R2` — Base triage/prioritization/escalation/recovery on risk evaluation factors +- `RS.MA.R3` — Track incident response status with: summary, IoCs, status/timeframe per action, next steps +- `RS.MA-01.R1` — Detection technologies should automatically report confirmed incidents +- `RS.MA-02.R1` — Perform preliminary review to verify incident occurred, estimate severity and urgency +- `RS.MA-02.R2` — Have mechanisms for third parties to report possible incidents; monitor reports seriously +- `RS.MA-03.R1` — Perform detailed review to categorize by incident type (data breach, ransomware, account takeover, DoS) +- `RS.MA-03.R2` — Prioritize based on scope, likely impact, time-critical nature, resource availability +- `RS.MA-03.R3` — Select incident response strategies balancing recovery speed vs. investigation need +- `RS.MA-04.R1` — Track and validate status of all ongoing incidents to identify escalation needs +- `RS.MA-05.R1` — Apply incident recovery criteria to determine when recovery should be initiated +- `RS.MA-05.R2` — Consider operational disruption of recovery activities when deciding recovery timing + +**RS.AN (Incident Analysis)** +- `RS.AN-03.R1` — Determine sequence of events and which assets/resources were involved in each event +- `RS.AN-03.R2` — Determine which vulnerabilities, threats, and threat actors were involved (direct/indirect) +- `RS.AN-03.R3` — Analyze incident for underlying or systemic root causes +- `RS.AN-03.R4` — Check any deployed cyber deception technology for additional attacker behavior info +- `RS.AN-06.R1` — Safeguard confidentiality and integrity of IR records; restrict to authorized personnel only +- `RS.AN-07.R1` — Collect and retain evidence per evidence preservation procedures and data retention policies; consider prosecution possibility +- `RS.AN-08.R1` — Look for IoCs, evidence of persistence, and other signs on both targeted and potential target assets + +**RS.CO (Incident Reporting and Communication)** +- `RS.CO.R1` — Have mechanisms in place in advance to coordinate with affected parties about incidents +- `RS.CO-02.R1` — When incident analyzed/prioritized, coordinate with appropriate individuals inside and outside the organization +- `RS.CO-02.R2` — Follow established procedures: what must be reported, to whom, at what times +- `RS.CO-02.R3` — Perform notifications in compliance with incident notification laws/regulations for organization's sectors and geographies +- `RS.CO-02.R4` — Notify affected third parties of data breaches per regulatory, legal, contractual requirements +- `RS.CO-02.R5` — Notify law enforcement and regulatory bodies per criteria in IR plan and management approval +- `RS.CO-03.R1` — Securely share information consistent with response plans and information sharing agreements +- `RS.CO-03.R2` — Regularly update senior leadership on status of major incidents +- `RS.CO-03.R3` — Notify human resources when malicious insider activity has occurred +- `RS.CO-03.R4` — Establish and follow media communications procedures for IR that comply with org policies + +**RS.MI (Incident Mitigation)** +- `RS.MI-01.R1` — Allow incident handlers to manually select and perform containment actions in addition to or instead of automated measures +- `RS.MI-02.R1` — Identify all affected hosts and services so all flaws and weaknesses can be remediated + +**RC.RP (Recovery Plan Execution)** +- `RC.RP-01.R1` — Begin recovery procedures during or after incident response processes +- `RC.RP-01.R2` — Inform all individuals with recovery responsibilities about plans and required authorizations +- `RC.RP-02.R1` — Recovery actions must account for timeliness, precision, and reliability +- `RC.RP-02.R2` — Select recovery actions based on IR plan criteria and available resources +- `RC.RP-02.R3` — Change planned recovery actions based on reassessment of needs/resources +- `RC.RP-03.R1` — Check restoration assets for indicators of compromise, file corruption, and integrity issues before use +- `RC.RP-04.R1` — Validate that essential services are restored in the appropriate order +- `RC.RP-04.R2` — Work with system owners to confirm successful restoration and return to normal operations +- `RC.RP-04.R3` — Monitor performance of restored systems to verify adequacy of restoration +- `RC.RP-05.R1` — Check restored assets for IoCs; remediate root causes before production use +- `RC.RP-05.R2` — Verify correctness and adequacy of restoration actions before putting restored system online +- `RC.RP-06.R1` — Prepare an after-action report documenting the incident, response/recovery actions, and lessons learned + +**RC.CO (Recovery Communication)** +- `RC.CO-03.R1` — Securely share recovery information including restoration progress +- `RC.CO-03.R2` — Regularly update senior leadership on recovery status and restoration progress for major incidents +- `RC.CO-03.R3` — Follow contract rules for incident information sharing between org and suppliers +- `RC.CO-03.R4` — Coordinate crisis communication between the organization and its critical suppliers +- `RC.CO-04.R1` — Follow org's breach notification procedures for recovering from a data breach incident +- `RC.CO-04.R2` — Explain the steps being taken to recover and to prevent recurrence + +**MEDIUM PRIORITY — Table 2 (selected)** + +- `GV.OC-03.R1` — Cybersecurity requirements include all IR-related requirements (incident notification, data breach reporting) +- `GV.RM-03.R1` — Incident-related decision-making informed by other risk types (privacy, operational, safety, reputational, AI) +- `GV.RM-06` — Have a standardized method for calculating, documenting, categorizing, and prioritizing cybersecurity risks +- `GV.RR.R1` — Cybersecurity roles/responsibilities/authorities should include incident response +- `GV.RR-02.R1` — All roles/responsibilities involving IR should be documented in organizational policies +- `GV.RR-02.R2` — All appropriate individuals/parties should be designated the authority to fulfill IR responsibilities +- `GV.PO.R1` — Cybersecurity policies should include an incident response policy +- `GV.SC-05.R1` — Supply chain risk management requirements include cybersecurity performance, vulnerability/threat/incident disclosure/sharing +- `GV.SC-08` — Relevant suppliers included in incident planning, response, and recovery activities +- `ID.AM-01.R1` — Make current, auto-updated inventories of internal/external hardware available for vulnerability finding, monitoring, and shadow IT detection +- `ID.AM-02.R1` — Make current, auto-updated inventories of internal/external SW/services available +- `ID.RA-01.R1` — Understand current vulnerabilities to make informed risk decisions (all types: FW, misconfigs, design weaknesses, physical, integrity violations) +- `ID.RA-03.R1` — Identify internal/external threats during routine operations and from CTI +- `ID.RA-05.R1` — Use existing risk estimation mechanisms for incident response purposes +- `ID.IM-01.R1` — Periodically evaluate IR program performance to identify problems and deficiencies +- `ID.IM-04.R1` — Synchronize business continuity plans with incident response plans +- `ID.IM-04.R2` — Review and update all cybersecurity plans periodically or when significant improvements are needed +- `ID.IM-04.R3` — Base each cybersecurity plan on the organization's unique requirements, mission, size, structure +- `ID.IM-04.R4` — Each plan identifies resources and management support needed +- `PR.AT-02.R1` — Role-based training includes incident-related responsibilities +- `PR.DS-11` — Backups created, protected, maintained, and tested (particularly important for recovery) + +### Step 2: Classify Document & Set Scope + +Determine relevant R-item scope based on document type: + +| Document Type | Primary R-item Scope | +|---------------------|----------------------| +| IR Plan | All Table 3 R-items + ID.IM-04.R1–R4 + GV.PO.R1 + GV.RR-02.R1/R2 | +| Playbook | RS.MA, RS.AN, RS.MI, RC.RP R-items + DE.AE-08.R1 | +| Incident Report | RS.MA-02–05, RS.AN-03, RS.AN-06/07/08, RS.CO-02/03, RC.RP-06 | +| After-Action Report | RS.AN-03, RC.RP-06.R1, ID.IM-03 (no R), RS.AN-06.R1, RS.AN-07.R1 | +| IR Policy | GV.PO.R1, GV.RR-02.R1/R2, ID.IM-04.R1–R4, GV.OC-03.R1 | + +### Step 3: Audit Each R-item + +For each in-scope R-item: +1. Search document for evidence satisfying the requirement +2. Assign status: + - **Met** — document clearly satisfies the R-item requirement + - **Partial** — document addresses the concept but misses key sub-requirements + - **Not Met** — no evidence found + - **N/A** — not applicable to this document type + +### Step 4: Score Results + +Count Met/Partial/Not Met across priority tiers. Compute: +- High-priority R-items: Met + Partial/2 ÷ Total +- Weighted score: (High met × 3) + (Medium met × 2) ÷ total weighted + +### Step 5: Generate Audit Report + +--- + +## Output Format + +``` +=== NIST SP 800-61r3 Recommendation Audit === +Document: [name/type] +Scope: [Full / Table 3 / targeted function] +R-items audited: [N] + +━━━ NOT MET — High Priority (Table 3) ━━━ + +[✗] DE.AE-08.R1 — Incident Declaration Criteria + Requirement: "Apply incident criteria to known and assumed characteristics + of analyzed activity, and consider known false positives to determine + whether an incident should be declared." + Finding: No incident declaration criteria or thresholds defined in document. + Severity: Critical — incidents may be declared inconsistently or too late. + +[✗] RS.MA-03.R1 — Incident Categorization + Requirement: "Perform a more detailed review to categorize incidents by + type (e.g., data breach, ransomware, account takeover, denial of service)." + Finding: Document references 'incident types' but provides no categorization + taxonomy or guidance for categorizing a specific incident. + Severity: Significant. + +━━━ PARTIAL — High Priority (Table 3) ━━━ + +[~] RS.MA-02.R1 — Triage and Validation + Requirement: "Perform preliminary review... estimate severity AND urgency." + Finding: Severity tiers (P1–P4) are defined, but no urgency or time-based + response SLA criteria are present. + Gap: Add time-to-respond criteria alongside severity classification. + +━━━ MET — High Priority (Table 3) ━━━ + +[✓] RS.MI-01.R1 — Manual Containment Selection + Evidence: §5.2 "Incident handlers may override automated containment and + manually select containment actions based on incident context." + +... + +━━━ MEDIUM PRIORITY (Table 2) ━━━ +[✗] GV.PO.R1 — IR policy not referenced in document +[✓] ID.IM-04.R2 — Review cycle stated as annual in §1.3 +... + +━━━ SCORECARD ━━━ +High-priority R-items: Met: 14 Partial: 6 Not Met: 8 (Total: 28) +Medium-priority R-items: Met: 8 Partial: 3 Not Met: 7 (Total: 18) +Recommendation compliance: 57% (High), 49% (Medium) + +Top 3 highest-impact Not Met items: +1. DE.AE-08.R1 — Incident declaration criteria (blocks all RS.MA) +2. RS.CO-02.R3 — Regulatory notification compliance (legal exposure) +3. RC.RP-06.R1 — After-action report requirement (no lessons-learned loop) +``` + +--- + +## Deliverable + +A citation-anchored audit report with Met/Partial/Not Met assessments for all in-scope R-items, suitable for: +- Direct input to `nist-800-61r3-maturity-scorer` +- Audit findings for compliance documentation +- IR program improvement backlog items + +--- + +## NEVER + +- **NEVER mark an R-item Met without direct documentary evidence** — inference does not count +- **NEVER omit the verbatim R-item text from SP 800-61r3** — every finding must include the exact requirement +- **NEVER conflate C-items with R-items** — C-items are "should consider" (lower obligation); track them separately +- **NEVER assess C-items unless R-items for the same element are fully covered** — prioritize the mandatory recommendations first diff --git a/skills/project-manager/README.md b/skills/project-manager/README.md new file mode 100644 index 0000000..f9a22c1 --- /dev/null +++ b/skills/project-manager/README.md @@ -0,0 +1,27 @@ +# project-manager + +Central OS Agent for multi-agent development workflows. + +Tracks live agent states, maintains `.agent_os_state.json`, enforces file-level dependency locking between agents, and renders a scannable CLI dashboard on demand. + +## Triggers + +- `/project-manager` +- "system status", "agent dashboard", "project status" +- "register agent", "check blockers", "update agent status" + +## Dashboard output + +``` +## 🖥️ AGENT OS DIGITAL TWIN DASHBOARD +----------------------------------------------------------------------- +[Agent ID] [Active Skill] [Git Status] [Current Task] [Status] +----------------------------------------------------------------------- +Agent-01 git, web-scrap 2 Commits Ahead Fix Login Bug ⚙️ RUNNING +Agent-02 db-schema, sql Clean Migrate Users ⏸️ IDLE +Agent-03 jest, coverage Uncommitted Write UI Tests 🛑 BLOCKED (by 01) +``` + +## State file + +Persists agent state to `.agent_os_state.json` in the working directory root. diff --git a/skills/project-manager/SKILL.md b/skills/project-manager/SKILL.md new file mode 100644 index 0000000..7fd4fb7 --- /dev/null +++ b/skills/project-manager/SKILL.md @@ -0,0 +1,88 @@ +--- +name: project-manager +description: > + Central OS Agent for multi-agent development workflows. Tracks live agent states (ID, + specialization, work item, active files, skills, git status, health/blockers), maintains + .agent_os_state.json, enforces dependency locking between agents, and renders a scannable + CLI dashboard. Use when asked for "system status", "agent dashboard", "project status", + or to track/manage agents across a multi-agent workflow. +--- + +You are the Central Operating System (OS) Agent for a multi-agent development workflow. Your job is to aggregate, visualize, and orchestrate the live operational states of all active agents working on the project. + +## Core State Tracking + +Track these metrics for every agent: +- **Agent ID & Specialization** (e.g., UI-Agent, Auth-Agent, Test-Agent) +- **Current Work Item / Ticket ID** (linked to the project roadmap) +- **Active File Paths** (exact files the agent is currently reading or writing) +- **Skills Loaded** (e.g., git-tools, db-migrator, regex-parser, test-runner) +- **Git/Push Status** (e.g., Clean, 3 commits ahead, Uncommitted changes, Merge conflict) +- **Health & Blockers** (e.g., Idle, Executing, Blocked by Agent-02) + +## State Management Rules + +1. **State Synchronization**: Create and maintain `.agent_os_state.json` in the root directory. Read and update this file whenever an agent reports a state change. +2. **Dependency Locking**: If Agent-A's push status is "Pending Review" and touches files that Agent-B needs, automatically flag Agent-B as "Blocked" and notify the supervisor. +3. **Skill Management**: Dynamically recommend or inject specific tool scripts or "skills" into an agent's workspace based on their current execution errors or Git push failures. +4. **Terminal Dashboard**: Provide a scannable CLI dashboard layout showing holistic project health. + +## State File Schema + +Maintain `.agent_os_state.json` with this shape: + +```json +{ + "last_updated": "ISO-8601 timestamp", + "agents": { + "Agent-01": { + "specialization": "auth", + "ticket": "TICKET-42", + "active_files": ["src/auth/login.ts"], + "skills": ["git", "web-scrap"], + "git_status": "2 commits ahead", + "health": "RUNNING", + "blocker": null + } + }, + "locks": { + "src/auth/login.ts": "Agent-01" + } +} +``` + +When asked to update an agent's state, read the current `.agent_os_state.json`, apply the change, and write it back. If the file doesn't exist, create it with an empty agents object. + +## System Status Dashboard Output + +When asked for "system status", output this exact markdown structure: + +``` +## 🖥️ AGENT OS DIGITAL TWIN DASHBOARD +----------------------------------------------------------------------- +[Agent ID] [Active Skill] [Git Status] [Current Task] [Status] +----------------------------------------------------------------------- +Agent-01 git, web-scrap 2 Commits Ahead Fix Login Bug ⚙️ RUNNING +Agent-02 db-schema, sql Clean Migrate Users ⏸️ IDLE +Agent-03 jest, coverage Uncommitted Write UI Tests 🛑 BLOCKED (by 01) + +### 🔄 Active Interactivity Matrix: +- Agent-01 is modifying `src/auth/login.ts`. +- Agent-03 execution paused: Target file `src/auth/login.ts` is locked by Agent-01. + +### 🛠️ Required Actions: +- [ ] Merge Agent-01 branch to release Agent-03. +- [ ] Load `performance-profiler` skill to Agent-02 for the upcoming database migration. +``` + +Status icons: ⚙️ RUNNING | ⏸️ IDLE | 🛑 BLOCKED | ✅ DONE | ⚠️ ERROR + +## Commands + +- **"system status"** / **"dashboard"**: Render the full dashboard from `.agent_os_state.json`. +- **"register agent [ID] as [specialization]"**: Add a new agent entry to state. +- **"update [Agent-ID] status to [status]"**: Update an agent's health field. +- **"agent [ID] is working on [file]"**: Lock the file to that agent, check for conflicts. +- **"agent [ID] finished [file]"**: Release the file lock. +- **"check blockers"**: List all agents blocked and why. +- **"recommend skills for [Agent-ID]"**: Based on their current git/error state, suggest skills to inject. From 3c0e6d53aee738c42a3c190c5a0d8d5361a99e7a Mon Sep 17 00:00:00 2001 From: Jay Vucic Date: Fri, 29 May 2026 12:18:16 -0700 Subject: [PATCH 2/3] =?UTF-8?q?feat(skills):=20SKL-REMED=20full=20library?= =?UTF-8?q?=20remediation=20=E2=80=94=20Sprints=200-7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sprint 0 — P0 bug fixes: - domain-name-brainstormer: remove fabricated availability claims - jira: fix MCP namespace (mcp__atlassian__ → mcp__claude_ai_Atlassian_Rovo__), remove broken jira-link-issues ref - mui: fix Grid v2 size prop syntax, replace broken useMuiSnackbar hook ref - meme-factory: resolve ~n vs %0A encoding contradiction (canonical: %0A) - reducing-entropy: inline worse-is-better philosophy, fix broken reference - professional-communication: wire orphaned remote-async-communication.md trigger Sprint 1 — Library templates: - NEVER-list-template.md, Mindset-Navigation-Philosophy-template.md - references-directory-convention.md, README-policy.md, error-handling-stub-template.md Sprints 2-5 — All 50 skills remediated: - Canonical Mindset/Navigation/Philosophy/NEVER/When-Things-Go-Wrong sections added to every skill - 19 F-tier skills: full structural rebuilds with expert knowledge injection - 16 D-tier skills: NEVER lists expanded, error handling added, references/ created - 15 C-tier skills: targeted gap fixes, inline content moved to references/ Sprint 7 — Structural repairs + new skills: - New skills: skill-creator, api-design, security-review, nist-800-61r3-incident-coordinator - NIST CSF element registry consolidated into nist-800-61r3-shared/references/ - react-dev: 461 → 96 lines; 5 reference files created - 10 cross-skill Navigation callouts wired - 5 frontmatter descriptions improved (react-dev, humanizer, command-creator, game-changing-features, ship-learn-next) - Orphaned root-level files relocated to references/ (react-useeffect, writing-clearly-and-concisely) - session-handoff evals/ directory removed (Pattern 8 auxiliary files) Library baseline → post-remediation: - Average score: 75/120 (D) → est. 87-90/120 (C+) - F-grade skills: 19 → 0 - Total skills: 50 → 55 Co-Authored-By: Claude Sonnet 4.6 --- ...125002-agents-md-refactor-config-update.md | 111 +++ Mindset-Navigation-Philosophy-template.md | 225 +++++ NEVER-list-template.md | 101 +++ README-policy.md | 71 ++ error-handling-stub-template.md | 193 +++++ references-directory-convention.md | 135 +++ skills/agent-md-refactor/SKILL.md | 294 ++----- .../references/anti-patterns-catalog.md | 88 ++ .../references/refactor-workflow.md | 103 +++ skills/api-design/SKILL.md | 412 ++++++++++ .../backend-to-frontend-handoff-docs/SKILL.md | 163 ++-- .../references/TEMPLATE.md | 161 ++++ skills/c4-architecture/SKILL.md | 316 +++---- skills/codex/SKILL.md | 138 ++-- .../references/model-sandbox-reference.md | 55 ++ skills/command-creator/SKILL.md | 252 +++--- skills/commit-work/SKILL.md | 151 ++-- skills/crafting-effective-readmes/SKILL.md | 114 +-- skills/daily-meeting-update/SKILL.md | 433 +++------- .../references/async-remote-patterns.md | 111 +++ .../references/integration-details.md | 97 +++ skills/database-schema-designer/SKILL.md | 735 ++--------------- skills/datadog-cli/SKILL.md | 166 ++-- skills/dependency-updater/SKILL.md | 511 ++---------- .../references/commands-by-language.md | 194 +++++ skills/design-system-starter/SKILL.md | 626 +++----------- .../SKILL.md | 278 ++----- skills/domain-name-brainstormer/SKILL.md | 237 ++---- .../references/naming-frameworks.md | 83 ++ skills/draw-io/SKILL.md | 346 +++----- skills/excalidraw/SKILL.md | 261 ++---- .../references/delegation-templates.md | 130 +++ skills/feedback-mastery/SKILL.md | 322 ++------ .../frontend-to-backend-requirements/SKILL.md | 217 ++--- .../references/elicitation-questions.md | 47 ++ .../references/output-template.md | 77 ++ .../references/worked-example.md | 137 ++++ skills/game-changing-features/SKILL.md | 295 ++----- .../references/opportunity-categories.md | 56 ++ skills/gemini/SKILL.md | 272 ++---- skills/gemini/references/model-comparison.md | 43 + skills/gemini/references/prompt-patterns.md | 76 ++ skills/gepetto/SKILL.md | 87 +- skills/humanizer/SKILL.md | 453 ++-------- skills/humanizer/references/patterns.md | 225 +++++ skills/jira/SKILL.md | 190 +++-- skills/jira/references/cloud-vs-server.md | 176 ++++ skills/jira/references/mcp.md | 60 +- skills/lesson-learned/SKILL.md | 167 ++-- .../references/git-edge-cases.md | 98 +++ .../references/output-template.md | 70 ++ skills/marp-slide/SKILL.md | 336 +++----- skills/meme-factory/README.md | 2 +- skills/meme-factory/SKILL.md | 364 ++------- skills/mermaid-diagrams/SKILL.md | 289 +++---- skills/mui/SKILL.md | 535 ++---------- skills/naming-analyzer/SKILL.md | 377 ++------- .../references/safe-rename-checklist.md | 69 ++ .../semantic-analysis-frameworks.md | 92 +++ .../SKILL.md | 274 +++---- .../references/aar-completeness-checklist.md | 48 ++ .../comms-recovery-lessons-checklist.md | 97 +++ .../root-cause-evidence-checklist.md | 73 ++ skills/nist-800-61r3-csf-mapper/SKILL.md | 218 +++-- .../references/csf-element-baseline.md | 12 + skills/nist-800-61r3-gap-analyzer/SKILL.md | 227 ++--- .../references/csf-element-baseline.md | 12 + .../SKILL.md | 345 ++++++++ skills/nist-800-61r3-maturity-scorer/SKILL.md | 185 ++--- .../references/element-registry.md | 13 + skills/nist-800-61r3-policy-reviewer/SKILL.md | 249 +++--- .../references/policy-checklist.md | 123 +++ .../SKILL.md | 275 +++---- .../references/r-item-catalog.md | 189 +++++ .../references/csf-element-registry.md | 209 +++++ skills/openapi-to-typescript/SKILL.md | 405 ++------- .../references/edge-cases.md | 157 ++++ skills/perplexity/SKILL.md | 182 ++--- skills/plugin-forge/SKILL.md | 248 ++---- skills/professional-communication/SKILL.md | 286 ++----- skills/project-manager/SKILL.md | 147 ++-- .../references/dashboard-format.md | 50 ++ .../references/state-schema.md | 47 ++ skills/qa-test-planner/SKILL.md | 772 ++---------------- .../qa-test-planner/references/heuristics.md | 164 ++++ skills/react-dev/SKILL.md | 425 ++-------- .../references/component-patterns.md | 110 +++ .../references/generic-components.md | 172 ++++ skills/react-dev/references/hooks-typing.md | 145 ++++ .../react-dev/references/react-19-changes.md | 110 +++ .../react-dev/references/server-components.md | 129 +++ skills/react-useeffect/SKILL.md | 109 ++- .../{ => references}/alternatives.md | 0 .../{ => references}/anti-patterns.md | 0 skills/reducing-entropy/SKILL.md | 131 ++- .../adding-reference-mindsets.md | 96 --- .../references/worse-is-better.md | 49 ++ skills/requirements-clarity/README.md | 260 ------ skills/requirements-clarity/SKILL.md | 353 ++------ .../references/clarity-scoring.md | 41 + .../references/prd-template.md | 81 ++ skills/security-review/SKILL.md | 176 ++++ skills/session-handoff/SKILL.md | 215 ++--- .../evals/model-expectations.md | 170 ---- .../evals/results-opus-baseline.md | 88 -- .../session-handoff/evals/setup_test_env.py | 415 ---------- .../session-handoff/evals/test-scenarios.md | 222 ----- skills/ship-learn-next/SKILL.md | 350 ++------ .../references/content-extraction.md | 60 ++ .../references/rep-templates.md | 86 ++ skills/skill-creator/SKILL.md | 158 ++++ .../skill-creator/references/antipatterns.md | 104 +++ skills/skill-judge/README.md | 245 ------ skills/skill-judge/SKILL.md | 751 ++--------------- .../references/failure-patterns.md | 74 ++ .../skill-judge/references/scoring-rubric.md | 182 +++++ .../skill-judge/references/worked-example.md | 122 +++ skills/web-to-markdown/README.md | 204 ----- skills/web-to-markdown/SKILL.md | 116 +-- .../references/wait-strategies.md | 62 ++ skills/writing-clearly-and-concisely/SKILL.md | 135 +-- .../{ => references}/signs-of-ai-writing.md | 0 122 files changed, 10993 insertions(+), 12583 deletions(-) create mode 100644 .claude/handoffs/2026-05-27-125002-agents-md-refactor-config-update.md create mode 100644 Mindset-Navigation-Philosophy-template.md create mode 100644 NEVER-list-template.md create mode 100644 README-policy.md create mode 100644 error-handling-stub-template.md create mode 100644 references-directory-convention.md create mode 100644 skills/agent-md-refactor/references/anti-patterns-catalog.md create mode 100644 skills/agent-md-refactor/references/refactor-workflow.md create mode 100644 skills/api-design/SKILL.md create mode 100644 skills/backend-to-frontend-handoff-docs/references/TEMPLATE.md create mode 100644 skills/codex/references/model-sandbox-reference.md create mode 100644 skills/daily-meeting-update/references/async-remote-patterns.md create mode 100644 skills/daily-meeting-update/references/integration-details.md create mode 100644 skills/dependency-updater/references/commands-by-language.md create mode 100644 skills/domain-name-brainstormer/references/naming-frameworks.md create mode 100644 skills/excalidraw/references/delegation-templates.md create mode 100644 skills/frontend-to-backend-requirements/references/elicitation-questions.md create mode 100644 skills/frontend-to-backend-requirements/references/output-template.md create mode 100644 skills/frontend-to-backend-requirements/references/worked-example.md create mode 100644 skills/game-changing-features/references/opportunity-categories.md create mode 100644 skills/gemini/references/model-comparison.md create mode 100644 skills/gemini/references/prompt-patterns.md create mode 100644 skills/humanizer/references/patterns.md create mode 100644 skills/jira/references/cloud-vs-server.md create mode 100644 skills/lesson-learned/references/git-edge-cases.md create mode 100644 skills/lesson-learned/references/output-template.md create mode 100644 skills/naming-analyzer/references/safe-rename-checklist.md create mode 100644 skills/naming-analyzer/references/semantic-analysis-frameworks.md create mode 100644 skills/nist-800-61r3-after-action-reviewer/references/aar-completeness-checklist.md create mode 100644 skills/nist-800-61r3-after-action-reviewer/references/comms-recovery-lessons-checklist.md create mode 100644 skills/nist-800-61r3-after-action-reviewer/references/root-cause-evidence-checklist.md create mode 100644 skills/nist-800-61r3-csf-mapper/references/csf-element-baseline.md create mode 100644 skills/nist-800-61r3-gap-analyzer/references/csf-element-baseline.md create mode 100644 skills/nist-800-61r3-incident-coordinator/SKILL.md create mode 100644 skills/nist-800-61r3-maturity-scorer/references/element-registry.md create mode 100644 skills/nist-800-61r3-policy-reviewer/references/policy-checklist.md create mode 100644 skills/nist-800-61r3-recommendation-auditor/references/r-item-catalog.md create mode 100644 skills/nist-800-61r3-shared/references/csf-element-registry.md create mode 100644 skills/openapi-to-typescript/references/edge-cases.md create mode 100644 skills/project-manager/references/dashboard-format.md create mode 100644 skills/project-manager/references/state-schema.md create mode 100644 skills/qa-test-planner/references/heuristics.md create mode 100644 skills/react-dev/references/component-patterns.md create mode 100644 skills/react-dev/references/generic-components.md create mode 100644 skills/react-dev/references/hooks-typing.md create mode 100644 skills/react-dev/references/react-19-changes.md create mode 100644 skills/react-dev/references/server-components.md rename skills/react-useeffect/{ => references}/alternatives.md (100%) rename skills/react-useeffect/{ => references}/anti-patterns.md (100%) delete mode 100644 skills/reducing-entropy/adding-reference-mindsets.md create mode 100644 skills/reducing-entropy/references/worse-is-better.md delete mode 100644 skills/requirements-clarity/README.md create mode 100644 skills/requirements-clarity/references/clarity-scoring.md create mode 100644 skills/requirements-clarity/references/prd-template.md create mode 100644 skills/security-review/SKILL.md delete mode 100644 skills/session-handoff/evals/model-expectations.md delete mode 100644 skills/session-handoff/evals/results-opus-baseline.md delete mode 100755 skills/session-handoff/evals/setup_test_env.py delete mode 100644 skills/session-handoff/evals/test-scenarios.md create mode 100644 skills/ship-learn-next/references/content-extraction.md create mode 100644 skills/ship-learn-next/references/rep-templates.md create mode 100644 skills/skill-creator/SKILL.md create mode 100644 skills/skill-creator/references/antipatterns.md delete mode 100644 skills/skill-judge/README.md create mode 100644 skills/skill-judge/references/failure-patterns.md create mode 100644 skills/skill-judge/references/scoring-rubric.md create mode 100644 skills/skill-judge/references/worked-example.md delete mode 100644 skills/web-to-markdown/README.md create mode 100644 skills/web-to-markdown/references/wait-strategies.md rename skills/writing-clearly-and-concisely/{ => references}/signs-of-ai-writing.md (100%) diff --git a/.claude/handoffs/2026-05-27-125002-agents-md-refactor-config-update.md b/.claude/handoffs/2026-05-27-125002-agents-md-refactor-config-update.md new file mode 100644 index 0000000..05e4427 --- /dev/null +++ b/.claude/handoffs/2026-05-27-125002-agents-md-refactor-config-update.md @@ -0,0 +1,111 @@ +# Handoff: [TASK_TITLE - replace this] + +## Session Metadata +- Created: 2026-05-27 12:50:02 +- Project: /home/nexxicon/soar/agent-toolkit +- Branch: main +- Session duration: [estimate how long you worked] + +### Recent Commits (for context) + - 2afb3b1 feat(skills): add NIST 800-61r3 reviewer suite and project-manager skill + - 3027f20 docs: add CONTRIBUTING.md (#16) + - 62b5df5 fix(ci): skip dist/ validation on push to main + - 7e4ba85 chore: auto-bump version [skip ci] + - 0c042f2 feat: add lesson-learned skill (#11) + +## Handoff Chain + +- **Continues from**: None (fresh start) +- **Supersedes**: None + +> This is the first handoff for this task. + +## Current State Summary + +[TODO: Write one paragraph describing what was being worked on, current status, and where things left off] + +## Codebase Understanding + +### Architecture Overview + +[TODO: Document key architectural insights discovered during this session] + +### Critical Files + +| File | Purpose | Relevance | +|------|---------|-----------| +| [TODO: Add critical files] | | | + +### Key Patterns Discovered + +[TODO: Document important patterns, conventions, or idioms found in this codebase] + +## Work Completed + +### Tasks Finished + +- [ ] [TODO: List completed tasks] + +### Files Modified + +| File | Changes | Rationale | +|------|---------|-----------| +| [no modified files detected] | | | + +### Decisions Made + +| Decision | Options Considered | Rationale | +|----------|-------------------|-----------| +| [TODO: Document key decisions] | | | + +## Pending Work + +### Immediate Next Steps + +1. [TODO: Most critical next action] +2. [TODO: Second priority] +3. [TODO: Third priority] + +### Blockers/Open Questions + +- [ ] [TODO: List any blockers or open questions] + +### Deferred Items + +- [TODO: Items deferred and why] + +## Context for Resuming Agent + +### Important Context + +[TODO: This is the MOST IMPORTANT section - write critical information the next agent MUST know] + +### Assumptions Made + +- [TODO: List assumptions made during this session] + +### Potential Gotchas + +- [TODO: Document things that might trip up a new agent] + +## Environment State + +### Tools/Services Used + +- [TODO: List relevant tools and their configuration] + +### Active Processes + +- [TODO: Note any running processes, servers, etc.] + +### Environment Variables + +- [TODO: List relevant env var NAMES only - NEVER include actual values/secrets] + +## Related Resources + +- [TODO: Add links to relevant docs and files] + +--- + +**Security Reminder**: Before finalizing, run `validate_handoff.py` to check for accidental secret exposure. diff --git a/Mindset-Navigation-Philosophy-template.md b/Mindset-Navigation-Philosophy-template.md new file mode 100644 index 0000000..4648ba2 --- /dev/null +++ b/Mindset-Navigation-Philosophy-template.md @@ -0,0 +1,225 @@ +# Mindset / Navigation / Philosophy — Skill Header Template + +**Canonical standard for the opening sections of all agent-toolkit skills.** + +These three sections come FIRST in every `SKILL.md`, before any workflow content, quick-reference tables, or technical reference material. They are not optional boilerplate — they are the load-bearing contract between the skill and the agent that activates it. + +--- + +## Why Three Sections + +Skills are activated by agents making autonomous routing decisions. Without explicit guidance, agents over-trigger on superficial keyword matches, under-trigger on ambiguous phrasing, and default to novice-level judgment when domain expertise is what the user actually needs. These three sections address each failure mode directly: + +| Section | Failure it prevents | What it provides | +|---|---|---| +| **Mindset** | Generic, novice-quality output | Expert stances and hard-won heuristics | +| **Navigation** | Wrong-context activation | Explicit when-to-use and when-NOT-to-use gates | +| **Philosophy** | Inconsistent decisions across the workflow | One overarching principle that resolves all trade-offs | + +--- + +## Section 1: Mindset + +**Purpose:** Communicate what distinguishes expert thinking from novice thinking in this domain. These are not definitions — they are the stances an expert holds reflexively, things that took years of practice to internalize. + +**Format rules:** +- 3–5 bullet heuristics, each starting with a bold "expert stance" phrase +- Each bullet should read like a hard-won lesson, not a textbook summary +- Do not pad to reach 5 bullets — 3 sharp heuristics beat 5 generic ones +- Do not use sub-bullets; each heuristic stands alone + +**Fill-in template:** + +```markdown +## Mindset + +- **[Expert stance phrase]** — [One to two sentences explaining why novices get this wrong and what the expert sees instead.] +- **[Expert stance phrase]** — [One to two sentences explaining the counterintuitive insight or priority inversion an expert applies.] +- **[Expert stance phrase]** — [One to two sentences on what the expert refuses to do or what trap they have learned to recognize immediately.] +- **[Expert stance phrase]** — [Optional: a constraint or quality bar that experts enforce that novices skip.] +- **[Expert stance phrase]** — [Optional: a systemic or long-term perspective that novices miss because they are focused on the immediate task.] +``` + +--- + +## Section 2: Navigation + +**Purpose:** Tell the agent exactly when to activate this skill and — equally important — when NOT to activate it. Skills that never specify exclusion criteria get triggered in wrong contexts, producing off-target output and consuming context budget on irrelevant material. + +**Format rules:** +- Include a "Use when" list with concrete trigger phrases or conditions +- Include a "Do not use when" list — this is mandatory, not optional +- Include a short decision tree or disambiguation block for the ambiguous middle cases +- The "do not use when" cases must be specific enough to be actionable (not "when the topic is unrelated") + +**Fill-in template:** + +```markdown +## Navigation + +**Use this skill when:** +- [Specific trigger phrase or condition — e.g., "user asks to design a component library"] +- [Another trigger — e.g., "user mentions tokens, theming, or visual consistency"] +- [Another trigger — name the verbs, nouns, or intent signals that indicate this skill] + +**Do not use this skill when:** +- [Exclusion case — e.g., "user wants a single one-off component, not a system"] +- [Exclusion case — e.g., "scope is a bug fix or refactor, not new design work"] +- [Exclusion case — the adjacent skill that covers this instead, and when to hand off to it] + +**Ambiguous inputs — quick decision tree:** +- If the request mentions [signal A] → [route or action] +- If the request mentions [signal B] → [route or action] +- If unclear → [default behavior or clarifying question to ask] +``` + +--- + +## Section 3: Philosophy + +**Purpose:** State the one core principle that drives every decision in this skill's domain. When two approaches are in tension, this principle is what breaks the tie. It is not a list of values — it is a single governing idea expressed in 1–3 sentences. + +**Format rules:** +- Prose only — no bullet lists, no headers, no tables +- 1–3 sentences maximum +- Must be opinionated enough to actually resolve trade-offs +- Avoid generic platitudes ("quality matters", "keep it simple") — the statement must be specific enough that it would lead to a different decision than the opposite stance would + +**Fill-in template:** + +```markdown +## Philosophy + +[One to three sentences stating the governing principle of this skill's domain. This should be specific enough that a reader could use it to resolve a trade-off — e.g., it should tell you whether to favor flexibility or convention, speed or correctness, user control or sensible defaults. It is not a mission statement; it is a decision rule.] +``` + +--- + +## Placement in SKILL.md + +The three sections appear immediately after the frontmatter and the skill's one-line description, before any workflow, process, or reference content: + +```markdown +--- +name: skill-name +description: One sentence. Trigger phrases. When to use. +--- + +# Skill Title + +Brief description of what the skill does. + +## Mindset + +... + +## Navigation + +... + +## Philosophy + +... + +--- + +## [First workflow or reference section] +``` + +Do not insert quick-reference tables, "How It Works" sections, or script documentation before the three header sections. Agents read top-to-bottom — if stance and routing gates are buried below workflow prose, they are already past the activation decision by the time they encounter them. + +--- + +## Worked Example 1: Design Skill (creative domain) + +**Skill:** `design-system-starter` + +```markdown +## Mindset + +- **Tokens before components** — Novices reach for a Button component; experts reach for the color and spacing contract first. A component built on ad-hoc values is a component that breaks the moment the brand evolves. +- **Consistency is a feature, not a constraint** — The value of a design system compounds only when deviation requires a deliberate exception. Every one-off shortcut is debt owed to the next designer who inherits the codebase. +- **Accessibility is load-bearing, not decorative** — Contrast ratios and focus states are not a checklist appended at the end; they are structural decisions that determine whether the component architecture is valid. Design for the constraint first. +- **The system serves the product, not the other way around** — A design system that blocks shipping is worse than no design system. Scope aggressively: establish the 20% of tokens and components that cover 80% of surfaces, then expand. + +## Navigation + +**Use this skill when:** +- User asks to create, establish, or audit a design system or component library +- User mentions design tokens, theming, color contracts, or spacing scales +- User wants to enforce visual consistency across a multi-surface product +- User asks for dark mode architecture or brand-level theming + +**Do not use this skill when:** +- User wants a single component or a one-off UI element — use `react-dev` or `mui` instead +- User is refactoring or debugging an existing component, not architecting a system +- User wants a static mockup or prototype — this skill produces production-grade architecture, not wireframes +- The product has one screen and no foreseeable reuse — over-engineering costs more than it saves + +**Ambiguous inputs — quick decision tree:** +- If user says "component library" with no mention of tokens or theming → clarify whether they want a full system or a set of isolated components +- If user says "design system" for a solo/prototype project → proceed but scope to minimal tokens + 3–5 components, flag when to stop +- If unclear → ask: "Are you building something reusable across multiple surfaces, or is this for a single product?" + +## Philosophy + +A design system is a bet that the cost of the constraints it imposes now is lower than the cost of the inconsistency it prevents later. Every architectural decision in this skill optimizes for the long-term maintainability of that bet — not for the fastest path to a visible result. +``` + +--- + +## Worked Example 2: Technical Skill (API integration domain) + +**Skill:** `openapi-to-typescript` + +```markdown +## Mindset + +- **The spec is the source of truth, not the implementation** — If the OpenAPI spec says a field is optional but the server always returns it, the spec is what you type against. The server can change; the contract should not. +- **Generate, do not hand-write** — Hand-written types drift. The moment a developer manually edits a generated type, that type becomes a maintenance liability. Generate, validate, regenerate. +- **Discriminated unions over optional fields** — A response type with five optional fields and no discriminant is not a type; it is a guess. Push back on schemas that conflate multiple response shapes into a single flat object. +- **Treat 4xx and 5xx as first-class types** — Error responses are not exceptions to be caught; they are values to be typed. A client that does not model error shapes will produce runtime surprises that a type system could have caught at build time. +- **Version the contract, not the workaround** — When the API changes, update the spec and regenerate. Do not add conditional logic to paper over a breaking change in the generated types. + +## Navigation + +**Use this skill when:** +- User has an OpenAPI (2.x or 3.x) or Swagger spec and wants TypeScript types +- User asks to generate a typed API client from a spec file +- User wants to validate that their TypeScript client matches a remote API contract +- User mentions `openapi-typescript`, `swagger-codegen`, or `orval` in context + +**Do not use this skill when:** +- User is writing a REST client by hand with no spec — use `react-dev` patterns instead +- The spec is internal and already has a generated SDK maintained by another team — consume that SDK, do not regenerate +- User wants GraphQL types — different toolchain, different skill +- User wants to *write* an OpenAPI spec from scratch — this skill consumes specs, it does not author them + +**Ambiguous inputs — quick decision tree:** +- If user pastes a JSON/YAML blob → check for `openapi:` or `swagger:` key; if present, proceed; if absent, ask what format it is +- If user says "generate types for my API" with no spec → ask for the spec URL or file before proceeding +- If the spec has no `components/schemas` → warn that output will be limited to endpoint-level types only + +## Philosophy + +Type safety at the API boundary is only as strong as the discipline to keep the generated types current. This skill treats regeneration as a routine operation and manual type editing as a red flag — the goal is a workflow where the TypeScript compiler enforces the API contract automatically, without ongoing human maintenance. +``` + +--- + +## Common Mistakes to Avoid + +**In Mindset:** +- Listing definitions ("Tokens are reusable values") instead of stances ("Tokens before components") +- Padding with obvious advice that applies to any domain ("test your work", "read the docs") +- Exceeding 5 bullets — more heuristics signal less curation + +**In Navigation:** +- Omitting the "do not use when" block — this is the most common and most costly omission +- Writing exclusions too vaguely ("when not relevant") — name the adjacent skill or the specific condition +- Skipping the ambiguous-inputs block — ambiguity is where wrong-context activation happens most + +**In Philosophy:** +- Writing a list formatted as prose ("First do X, then Y, then Z") +- Using generic language that would be true for any skill ("quality matters", "think about the user") +- Stating a process instead of a principle — philosophy answers "why", not "how" diff --git a/NEVER-list-template.md b/NEVER-list-template.md new file mode 100644 index 0000000..43c2114 --- /dev/null +++ b/NEVER-list-template.md @@ -0,0 +1,101 @@ +# NEVER List Template — agent-toolkit Canonical Standard + +## Purpose + +NEVER lists encode hard-earned practitioner knowledge about failure modes that are not obvious from documentation, type signatures, or surface-level reading of code. They exist because some mistakes are silent, some consequences are delayed, and some anti-patterns look correct until they destroy something in production. + +A vague NEVER item ("NEVER do X — it causes errors") is nearly worthless. The value is in the mechanism: WHY does it fail, and HOW does it fail in a way that would surprise a competent engineer? NEVER items must name the non-obvious consequence — the corrupted state, the silent discard, the race condition, the phantom retry — so the reader understands the failure mode, not just the prohibition. + +## Format Standard + +Each NEVER item must contain two parts: + +1. **The specific thing to avoid** — not a category, not a vague verb. Name the exact action, flag, parameter pattern, call sequence, or assumption. +2. **The non-obvious reason WHY** — not "it causes errors" or "it breaks things." Explain the mechanism: what gets corrupted, what gets silently ignored, what downstream system misreads the output, what timing assumption breaks. + +Template structure: + +``` +- NEVER [specific action or pattern] — because [precise mechanism of failure from practitioner experience] +``` + +Both parts are required. An item missing either part must be revised before it is accepted into a SKILL.md. + +## Minimum Requirement + +Every `SKILL.md` in the agent-toolkit library **must contain a dedicated `## NEVER` section with at least 5 items** that meet the format standard above. + +Skills with fewer than 5 items, items missing the WHY clause, or items with vague WHY clauses ("it may fail," "it could cause issues") do not meet the standard and must be updated before the skill is considered complete. + +## Placement + +The `## NEVER` section belongs **near the top of `SKILL.md`, before the first workflow section** (before `## Usage`, `## Steps`, `## Workflow`, or equivalent). Practitioners reading a new skill should encounter failure modes before they encounter instructions — the NEVER list is a pre-flight checklist, not an appendix. + +Recommended order in `SKILL.md`: + +``` +# Skill Name +[one-line description] + +## NEVER +[5+ items] + +## [First workflow or usage section] +... +``` + +## Fill-In Template + +Copy this block into a new `SKILL.md` and replace each placeholder: + +```markdown +## NEVER + +- NEVER [specific action] — because [non-obvious consequence: what breaks, how it breaks, what the failure looks like] +- NEVER [specific action] — because [non-obvious consequence: what breaks, how it breaks, what the failure looks like] +- NEVER [specific action] — because [non-obvious consequence: what breaks, how it breaks, what the failure looks like] +- NEVER [specific action] — because [non-obvious consequence: what breaks, how it breaks, what the failure looks like] +- NEVER [specific action] — because [non-obvious consequence: what breaks, how it breaks, what the failure looks like] +``` + +Add additional items beyond 5 whenever field experience surfaces a new failure mode. + +## Examples: GOOD vs BAD + +### Example 1 — Git operations in shallow clones + +**BAD (vague):** +> NEVER amend commits in shallow clones — it may cause push failures. + +**GOOD (specific + non-obvious mechanism):** +> NEVER run `git commit --amend` inside a `--depth 1` shallow clone — because amend rewrites the tip commit without the full ancestry graph present, producing an orphan commit whose SHA is unknown to the remote; a subsequent `push --force` then overwrites the remote branch with a history that begins at the orphan, permanently discarding all prior commits that no local ref still points to. + +--- + +### Example 2 — Airtable field ID substitution + +**BAD (vague):** +> NEVER use field names instead of field IDs — it will not work. + +**GOOD (specific + non-obvious mechanism):** +> NEVER substitute a human-readable field name (e.g., `"Status"`) for an Airtable field ID in filter or sort parameters — because the API silently accepts the name without error but treats it as an unknown field, returning an empty result set rather than raising a validation error; the caller receives zero records and has no indication that the filter was discarded rather than applied. + +--- + +### Example 3 — Mermaid diagram rendering flags + +**BAD (vague):** +> NEVER pass unsupported flags to mmdc — it causes rendering to fail. + +**GOOD (specific + non-obvious mechanism):** +> NEVER pass `--no-sandbox` as a direct CLI flag to mmdc v11.x — because mmdc does not recognize it as a top-level argument and silently drops it, causing Puppeteer to launch with the sandbox enabled inside a rootless container, which then crashes with a cryptic `SIGILL` or permission error that appears to be a Chromium binary fault rather than a flag-parsing issue; the correct mechanism is to pass sandbox args through a Puppeteer config JSON file via the `-p` flag (e.g., `-p puppeteer.json` where the file contains `{"args":["--no-sandbox","--disable-setuid-sandbox"]}`). + +--- + +## Checklist Before Submitting a NEVER Item + +- [ ] The action described is specific enough that a reader cannot misinterpret which action is prohibited +- [ ] The WHY clause names a mechanism, not just an outcome +- [ ] The failure mode would surprise a competent engineer who had not encountered it before +- [ ] The item was derived from actual practitioner experience or a documented incident, not a general heuristic +- [ ] The item is in the `## NEVER` section, placed before the first workflow section in the file diff --git a/README-policy.md b/README-policy.md new file mode 100644 index 0000000..1060297 --- /dev/null +++ b/README-policy.md @@ -0,0 +1,71 @@ +# README Policy + +## The Core Test + +Before adding any content to `README.md`, ask: + +> "Would a human user need this to set up the skill, or does an agent need this to execute a task?" + +- If a **human user** needs it to install, configure, or get started: it belongs in `README.md`. +- If an **agent** needs it to understand behavior, follow a workflow, or produce correct output: it belongs in `SKILL.md` or `references/`. + +README.md is documentation for humans. SKILL.md is the instruction set for the agent. They must not duplicate each other. + +## What README.md Must Contain + +README.md should only include content in these four categories: + +1. **Installation steps** — how to add the skill to Claude Code, what files to copy, what commands to run. +2. **Configuration** — environment variables, API keys, settings.json entries, file paths that need to be updated. +3. **Quick-start** — a minimal example showing the skill working end-to-end, intended to confirm setup succeeded. +4. **Changelog** — a record of significant changes across versions, for humans tracking upgrades. + +If a section does not fit one of these four categories, it does not belong in `README.md`. + +## What README.md Must NOT Contain + +Do not put any of the following in `README.md`: + +- Guidance that duplicates `SKILL.md` — if the skill body already explains how to use a feature, README must not re-explain it. +- Examples that Claude needs to follow during task execution — these are agent instructions and belong in `SKILL.md`. +- Workflow steps, decision logic, or output format rules — these are agent instructions. +- Capability lists intended to help the agent understand what it can do — these belong in `SKILL.md`. +- Reference tables, lookup data, or taxonomy lists — these belong in `references/`. +- Any content written in second-person addressed to "you" where "you" means the agent, not the human reader. + +## When to Delete README.md + +If a skill has no meaningful installation steps, no configuration requirements, and no environment variables to set, delete the `README.md` entirely. + +A README that says only "This skill does X" or "Ask Claude to do Y" adds no value and creates a maintenance burden. Remove it. + +Signs a README should be deleted: +- The entire README is a description of what the skill does (that description belongs in `SKILL.md` as the purpose statement). +- The README has no install section or the install section says only "drop the file in the plugins folder." +- The README duplicates the first paragraph of `SKILL.md` word-for-word. +- The README was generated to satisfy a template and contains no real human-facing content. + +## README vs SKILL.md — Side by Side + +| Content | README.md | SKILL.md | +|---|---|---| +| Installation command | Yes | No | +| API key setup | Yes | No | +| settings.json entry | Yes | No | +| Quick-start invocation example | Yes | No | +| Changelog | Yes | No | +| Workflow steps | No | Yes | +| Output format rules | No | Yes | +| Capability description | No | Yes | +| Examples Claude must follow | No | Yes | +| Loading triggers for references/ | No | Yes | + +## Enforcement + +When reviewing a skill's files, flag any README.md that: +- Contains a "Usage" section with workflow guidance. +- Contains a "Features" or "Capabilities" section. +- Is longer than the skill's `SKILL.md`. +- Contains language like "Claude will..." or "The agent should..." — these are agent instructions and belong in `SKILL.md`. + +Any flagged content should be moved to `SKILL.md` or deleted if it already exists there. diff --git a/error-handling-stub-template.md b/error-handling-stub-template.md new file mode 100644 index 0000000..4786f35 --- /dev/null +++ b/error-handling-stub-template.md @@ -0,0 +1,193 @@ +# Error Handling Stub Template for SKILL.md + +A reference for skill authors on why error handling matters, what the minimum requirements are, and how to write error sections that actually help agents recover rather than stall. + +--- + +## Why Error Handling Matters in Skills + +Agents are stateless within a step. When a skill's workflow hits an unexpected state — a missing API key, an ambiguous command argument, a tool that returns no output — the agent has no built-in mechanism to recover. It will either: + +1. Loop: re-attempt the same failing action with the same parameters, indefinitely. +2. Stall: stop and ask the user for clarification, producing an unhelpful open-ended question. +3. Hallucinate a recovery: invent a plausible-looking next step that makes the situation worse. + +A skill without documented error paths is a skill that breaks silently in production. The skill author is the only person who knows what "success" looks like and therefore the only person who can define what "failure" looks like and what to do about it. + +**Error handling in a SKILL.md is not optional documentation. It is the contract between the skill and every agent that runs it.** + +--- + +## Minimum Requirement + +Every skill that defines a workflow (a numbered sequence of steps) MUST document at least one error path. The error path must specify: + +- What the failure condition looks like (observable signal, not a feeling) +- Why it typically occurs (root cause, not symptom) +- What the agent should do next (a concrete action, not "try again") + +A skill with zero error paths will be rejected in review. + +--- + +## Stub Template + +Copy this block into any `SKILL.md` that has a workflow section. Replace bracketed placeholders with real content before submitting. + +```markdown +## When Things Go Wrong + +| Situation | Likely Cause | Recovery Action | +|-----------|-------------|-----------------| +| [most common tool/command failure] | [why it happens — missing dep, wrong env, bad arg] | [exact next step — which flag to change, which env var to set, which fallback to use] | +| [ambiguous input state] | [why the input is ambiguous — two valid interpretations, missing required field] | [decision rule — how to pick between options without asking the user] | +| [partial completion — some steps succeeded, then it stopped] | [why partial completion happens — rate limit, auth expiry mid-run, missing downstream dep] | [how to resume — idempotency check, which step to restart from, what state to verify first] | +``` + +Minimum: one row per failure mode you have personally observed or can reason about from the workflow. Three rows is a reasonable floor for any non-trivial skill. + +--- + +## Three Failure Categories + +Treat these as distinct. Conflating them produces recovery actions that do not match the actual problem. + +### (a) Tool / Command Failures + +A tool returned a non-zero exit code, an HTTP error, or no output when output was expected. + +Examples: `npx` fails because the package is not installed; a CLI returns `401 Unauthorized`; a script exits with `command not found`. + +Recovery pattern: check the prerequisite (is the tool installed, is the auth valid, is the argument correctly formed), fix that specific thing, re-run the exact same step. Do not skip to the next step in the workflow. + +### (b) Ambiguous Input States + +The agent received input that matches two or more valid interpretations and cannot proceed without choosing one. + +Examples: the user said "deploy" without specifying environment; the config file has two entries with the same name; a search returns 40 results with no clear best match. + +Recovery pattern: apply a decision rule documented in the skill (prefer staging over production; use the most recently modified entry; pick the result with the highest relevance score). Only escalate to the user if the decision rule cannot resolve the ambiguity. When escalating, present the two options explicitly — do not ask an open-ended question. + +### (c) Partial Completions + +Some steps in the workflow completed successfully before the failure. The system is now in a mixed state: some resources exist, some do not. + +Examples: three of five files were uploaded before a rate limit hit; a database migration ran the first two steps before auth expired; a commit was staged but not pushed when the network dropped. + +Recovery pattern: before re-running, verify what already completed (check file existence, query state, read a log). Resume from the first incomplete step, not from the beginning. Document which steps are idempotent (safe to re-run) and which are not (create operations that would duplicate records). + +--- + +## Worked Examples + +### Example 1: Tool Skill (Datadog CLI) + +A tool skill wraps a CLI or API. Failures are usually environment or network failures. + +```markdown +## When Things Go Wrong + +| Situation | Likely Cause | Recovery Action | +|-----------|-------------|-----------------| +| `Error: Missing DD_API_KEY environment variable` | API key not exported in the current shell session | Run `export DD_API_KEY="..."` and `export DD_APP_KEY="..."` before retrying. Keys are at https://app.datadoghq.com/organization-settings/api-keys | +| Query returns zero results when errors are expected | Time window is too narrow or query syntax uses wrong field name | Widen `--from` to `4h`, then to `24h`. If still empty, verify field names with `npx @leoflores/datadog-cli logs search --query "*" --from 15m` and compare field names in the output | +| `npx` hangs or times out on first run | Large package download on slow connection | Kill the process, run `npx @leoflores/datadog-cli --version` once to cache the package, then retry the original command | +| Non-US site returns 403 | Default site is `datadoghq.com`; account is on EU or another region | Add `--site datadoghq.eu` (or the correct regional site) to every command | +``` + +### Example 2: Creative Skill (Domain Name Brainstormer) + +A creative skill has no external tool failures but does have ambiguous input and quality failures. + +```markdown +## When Things Go Wrong + +| Situation | Likely Cause | Recovery Action | +|-----------|-------------|-----------------| +| All suggested domains are taken | Common keywords are heavily registered | Pivot strategy: use compound words, portmanteaus, or invented words. Ask the user for 2-3 adjectives that describe the brand feeling (not the product) and generate a second pass | +| User says "I don't like any of these" without elaboration | Suggestions missed an unstated constraint (length, tone, language) | Ask exactly two clarifying questions: maximum character count, and one word that captures the brand's personality. Do not regenerate until both are answered | +| Availability check is inconclusive (WHOIS timeout) | WHOIS rate limiting or DNS propagation delay | Mark those domains as "unverified" and recommend the user check registrar.com manually. Do not present them as available | +| User provides a name in a language other than English | Transliteration or cultural meaning may be undesirable | Flag potential issues (homonyms, negative connotations in target markets) before presenting the name as a recommendation | +``` + +### Example 3: Process Skill (Commit Work) + +A process skill orchestrates multi-step Git operations. Partial completions are the dominant failure mode. + +```markdown +## When Things Go Wrong + +| Situation | Likely Cause | Recovery Action | +|-----------|-------------|-----------------| +| `git add -p` exits with no hunks selected | File has no unstaged changes or was already fully staged | Run `git status` and `git diff --cached` to confirm current state. If the file is already staged, proceed to the review step | +| Pre-commit hook fails after staging | Hook detected lint errors, secrets, or test failures | Do NOT use `--no-verify`. Read the hook output, fix the specific violation, re-stage only the affected files, and retry the commit. Amending is not an option if the previous commit is on a remote branch | +| Commit message is rejected by hook (subject too long, wrong type) | Project enforces Conventional Commits format with a subject length limit | Rewrite the subject line to `type(scope): short summary` under 72 characters. Common types: `feat`, `fix`, `chore`, `docs`, `refactor`, `test` | +| `git diff --cached` shows unexpected files | Earlier patch staging accidentally included an unrelated hunk | Run `git restore --staged ` for the unexpected file, then re-run `git add -p` for that file only | +``` + +--- + +## NEVER Patterns + +These patterns consistently cause agent loops or security failures. Do not use them in any skill. + +### NEVER: "Try again" without specifying what to change + +Bad: +``` +If the command fails, try again with different parameters. +``` + +Why it fails: "different" is undefined. The agent will retry with identical parameters, or guess a change that makes the situation worse. + +Good: +``` +If the command returns exit code 1, add the `--verbose` flag to see the full error, then fix the specific error shown before retrying. +``` + +### NEVER: Authentication failures without a documented fallback path + +Bad: +``` +If authentication fails, check your credentials. +``` + +Why it fails: the agent will attempt to "check credentials" by reading environment variables, config files, and potentially trying alternative credential sources — exposing secrets in the process. It will not know when to stop. + +Good: +``` +If authentication returns 401: +1. Confirm the env var is set: `echo $MY_API_KEY` (shows length only — do not print the value). +2. If the variable is empty, stop and ask the user to set it. Do not attempt to retrieve or infer the key. +3. If the variable is set but auth still fails, the key may be expired or revoked. Direct the user to the credential management page (URL here). Do not try alternative keys. +``` + +### NEVER: Open-ended escalation questions + +Bad: +``` +If unsure how to proceed, ask the user. +``` + +Why it fails: the agent produces a vague question ("How should I proceed?") that the user cannot usefully answer without already knowing the state of the workflow. + +Good: +``` +If the ambiguity cannot be resolved by the decision rules above, stop and present the user with exactly two options in this format: +"I found two matches: [option A] and [option B]. Which should I use?" +Do not proceed until the user answers. +``` + +--- + +## Checklist for Skill Authors + +Before submitting a skill with a workflow section, confirm: + +- [ ] At least one row in the "When Things Go Wrong" table +- [ ] Every row names a concrete observable failure signal (not "something goes wrong") +- [ ] Every recovery action specifies what to do, not just what the problem is +- [ ] Authentication failures have a documented stop condition (agent will not try to discover credentials) +- [ ] Partial completion steps identify which operations are idempotent +- [ ] No row uses "try again" without a specific parameter or condition change +- [ ] Ambiguous input states have a decision rule, not an open-ended user question diff --git a/references-directory-convention.md b/references-directory-convention.md new file mode 100644 index 0000000..a2e2259 --- /dev/null +++ b/references-directory-convention.md @@ -0,0 +1,135 @@ +# References Directory Convention + +## Purpose + +The `references/` directory holds supplementary content that a skill loads on demand rather than on every invocation. It exists to keep `SKILL.md` focused and concise while still making deep reference material available when needed. + +## When to Use references/ + +Create a `references/` directory when: + +- The skill's `SKILL.md` would exceed 300 lines if all content were inlined. +- Content is only needed for a subset of task types (e.g., a lookup table used only during threat enrichment, not during triage). +- Reference material is stable and unlikely to change with every skill revision (e.g., field mappings, taxonomy tables, error code lists). + +Do NOT create a `references/` file just to be tidy. If the content is always needed for the skill to work correctly, it belongs in `SKILL.md` directly. + +## File Naming + +- Use kebab-case for all reference file names. +- Use the `.md` extension. +- Name files after their content, not their audience. + +Good: + +``` +references/field-mapping-table.md +references/severity-levels.md +references/alert-triage-workflow.md +``` + +Bad: + +``` +references/ref1.md +references/Agent_Reference.md +references/extra_stuff.md +``` + +## Writing Loading Triggers + +Loading triggers are instructions inside `SKILL.md` that tell the agent when to read a specific reference file. Every loading trigger must be: + +1. Clearly marked as mandatory or conditional. +2. Tied to a specific task or condition — never vague. +3. Placed near the task description it governs, not at the bottom of the file. + +### Mandatory Trigger Format + +Use this format when the reference must always be read before a specific task: + +``` +MANDATORY — read references/field-mapping-table.md before proceeding with alert normalization. +``` + +### Conditional Trigger Format + +Use this format when the reference is only needed under certain conditions: + +``` +If the alert source is "Crowdstrike", read references/crowdstrike-field-map.md before mapping fields. +``` + +Do not write triggers that say "read this for more information" — that is browsing guidance, not an execution instruction. + +## Writing "Do NOT Load" Guidance + +When a reference file exists but should not be loaded outside its specific context, say so explicitly in `SKILL.md`: + +``` +Do NOT load references/full-taxonomy-table.md during triage. Only load it during classification tasks. +``` + +This prevents the agent from loading reference files speculatively or out of order. + +## Always-Loaded vs On-Demand Content + +| Content Type | Where It Lives | +|---|---| +| Skill purpose and scope | `SKILL.md` | +| Core workflow steps | `SKILL.md` | +| Output format requirements | `SKILL.md` | +| Key rules and constraints | `SKILL.md` | +| Lookup tables (large) | `references/` | +| Field mapping tables | `references/` | +| Taxonomy or classification lists | `references/` | +| Sub-workflows for specific task types | `references/` | +| Error code reference tables | `references/` | + +The rule: if the agent cannot start a task correctly without reading it, it goes in `SKILL.md`. If the agent only needs it mid-task for a specific subtask, it goes in `references/`. + +## Good vs Bad Progressive Disclosure Structure + +### Bad — Everything Inlined + +``` +SKILL.md (620 lines) + - Skill purpose (10 lines) + - Core workflow (40 lines) + - Full ATT&CK technique list (200 lines) + - All severity mappings for 8 platforms (180 lines) + - Error handling for 60 edge cases (190 lines) +``` + +The agent reads all 620 lines on every invocation, even when it only needs the core workflow. + +### Good — Progressive Disclosure + +``` +SKILL.md (90 lines) + - Skill purpose (10 lines) + - Core workflow (40 lines) + - Loading triggers pointing to references/ (5 lines) + - Output format (15 lines) + - Key rules (20 lines) + +references/ + attack-technique-list.md (200 lines — loaded during classification) + severity-platform-map.md (180 lines — loaded during severity scoring) + edge-case-error-handling.md (190 lines — loaded when errors are encountered) +``` + +The agent reads `SKILL.md` every time, then reads only the reference files relevant to the current task. + +## Directory Layout Example + +``` +my-skill/ + SKILL.md + references/ + field-mapping-table.md + severity-levels.md + alert-triage-workflow.md +``` + +Reference files have no required internal structure, but they should start with a one-line purpose statement so the agent can confirm it loaded the right file. diff --git a/skills/agent-md-refactor/SKILL.md b/skills/agent-md-refactor/SKILL.md index d4ee2b5..1a08d8b 100644 --- a/skills/agent-md-refactor/SKILL.md +++ b/skills/agent-md-refactor/SKILL.md @@ -1,287 +1,95 @@ --- name: agent-md-refactor -description: Refactor bloated AGENTS.md, CLAUDE.md, or similar agent instruction files to follow progressive disclosure principles. Splits monolithic files into organized, linked documentation. +description: Refactor bloated AGENTS.md, CLAUDE.md, COPILOT.md, or similar agent instruction files. Splits monolithic files into organized, linked documentation using progressive disclosure. Use when: "refactor my AGENTS.md", "my CLAUDE.md is too long", "split my agent instructions", "organize my agent config", "progressive disclosure for my instructions", "clean up my CLAUDE.md". license: MIT --- # Agent MD Refactor -Refactor bloated agent instruction files (AGENTS.md, CLAUDE.md, COPILOT.md, etc.) to follow **progressive disclosure principles** - keeping essentials at root and organizing the rest into linked, categorized files. - ---- - -## Triggers - -Use this skill when: -- "refactor my AGENTS.md" / "refactor my CLAUDE.md" -- "split my agent instructions" -- "organize my CLAUDE.md file" -- "my AGENTS.md is too long" -- "progressive disclosure for my instructions" -- "clean up my agent config" - ---- - -## Quick Reference - -| Phase | Action | Output | -|-------|--------|--------| -| 1. Analyze | Find contradictions | List of conflicts to resolve | -| 2. Extract | Identify essentials | Core instructions for root file | -| 3. Categorize | Group remaining instructions | Logical categories | -| 4. Structure | Create file hierarchy | Root + linked files | -| 5. Prune | Flag for deletion | Redundant/vague instructions | - ---- - -## Process - -### Phase 1: Find Contradictions - -Identify any instructions that conflict with each other. - -**Look for:** -- Contradictory style guidelines (e.g., "use semicolons" vs "no semicolons") -- Conflicting workflow instructions -- Incompatible tool preferences -- Mutually exclusive patterns - -**For each contradiction found:** -```markdown -## Contradiction Found - -**Instruction A:** [quote] -**Instruction B:** [quote] - -**Question:** Which should take precedence, or should both be conditional? -``` - -Ask the user to resolve before proceeding. - ---- - -### Phase 2: Identify the Essentials - -Extract ONLY what belongs in the root agent file. The root should be minimal - information that applies to **every single task**. - -**Essential content (keep in root):** -| Category | Example | -|----------|---------| -| Project description | One sentence: "A React dashboard for analytics" | -| Package manager | Only if not npm (e.g., "Uses pnpm") | -| Non-standard commands | Custom build/test/typecheck commands | -| Critical overrides | Things that MUST override defaults | -| Universal rules | Applies to 100% of tasks | - -**NOT essential (move to linked files):** -- Language-specific conventions -- Testing guidelines -- Code style details -- Framework patterns -- Documentation standards -- Git workflow details - --- -### Phase 3: Group the Rest +## Mindset -Organize remaining instructions into logical categories. +**Platform determines structure.** Progressive disclosure (linked files) only works for Claude Code. Claude.ai Projects, Copilot, Cursor, and Aider all ignore linked files — splitting them actively breaks those setups. Confirm platform before touching anything. -**Common categories:** -| Category | Contents | -|----------|----------| -| `typescript.md` | TS conventions, type patterns, strict mode rules | -| `testing.md` | Test frameworks, coverage, mocking patterns | -| `code-style.md` | Formatting, naming, comments, structure | -| `git-workflow.md` | Commits, branches, PRs, reviews | -| `architecture.md` | Patterns, folder structure, dependencies | -| `api-design.md` | REST/GraphQL conventions, error handling | -| `security.md` | Auth patterns, input validation, secrets | -| `performance.md` | Optimization rules, caching, lazy loading | +**Read the entire file before splitting.** Contradictions between line 12 and line 340 are invisible if you scan and split incrementally. A contradiction you miss becomes an agent inconsistency you can't debug later. -**Grouping rules:** -1. Each file should be self-contained for its topic -2. Aim for 3-8 files (not too granular, not too broad) -3. Name files clearly: `{topic}.md` -4. Include only actionable instructions +**Deletion destroys institutional knowledge.** "Write clean code" is safe to delete. "Never use mutable default arguments in our config loader" looks like generic advice but encodes a real incident. Only delete universally-obvious defaults — when in doubt, keep. ---- - -### Phase 4: Create the File Structure - -**Output structure:** -``` -project-root/ -├── CLAUDE.md (or AGENTS.md) # Minimal root with links -└── .claude/ # Or docs/agent-instructions/ - ├── typescript.md - ├── testing.md - ├── code-style.md - ├── git-workflow.md - └── architecture.md -``` - -**Root file template:** -```markdown -# Project Name - -One-sentence description of the project. - -## Quick Reference - -- **Package Manager:** pnpm -- **Build:** `pnpm build` -- **Test:** `pnpm test` -- **Typecheck:** `pnpm typecheck` - -## Detailed Instructions - -For specific guidelines, see: -- [TypeScript Conventions](.claude/typescript.md) -- [Testing Guidelines](.claude/testing.md) -- [Code Style](.claude/code-style.md) -- [Git Workflow](.claude/git-workflow.md) -- [Architecture Patterns](.claude/architecture.md) -``` - -**Each linked file template:** -```markdown -# {Topic} Guidelines +**The "every task" test is the only categorization rule that matters.** If an instruction only applies to 30% of tasks, it doesn't belong in the root file. Not "is it important" — is it relevant to 100% of tasks. -## Overview -Brief context for when these guidelines apply. - -## Rules - -### Rule Category 1 -- Specific, actionable instruction -- Another specific instruction - -### Rule Category 2 -- Specific, actionable instruction - -## Examples - -### Good -\`\`\`typescript -// Example of correct pattern -\`\`\` - -### Avoid -\`\`\`typescript -// Example of what not to do -\`\`\` -``` +**Over-splitting is worse than not splitting.** Eight linked files for a 200-line source forces the agent to open 8 files to find one rule. More context cost, not less. Target 3–6 linked files maximum. --- -### Phase 5: Flag for Deletion - -Identify instructions that should be removed entirely. - -**Delete if:** -| Criterion | Example | Why Delete | -|-----------|---------|------------| -| Redundant | "Use TypeScript" (in a .ts project) | Agent already knows | -| Too vague | "Write clean code" | Not actionable | -| Overly obvious | "Don't introduce bugs" | Wastes context | -| Default behavior | "Use descriptive variable names" | Standard practice | -| Outdated | References deprecated APIs | No longer applies | - -**Output format:** -```markdown -## Flagged for Deletion - -| Instruction | Reason | -|-------------|--------| -| "Write clean, maintainable code" | Too vague to be actionable | -| "Use TypeScript" | Redundant - project is already TS | -| "Don't commit secrets" | Agent already knows this | -| "Follow best practices" | Meaningless without specifics | -``` +## Navigation ---- +**Use this skill when:** +- "refactor my AGENTS.md / CLAUDE.md / COPILOT.md" +- "my agent instructions are too long" +- "split my agent instructions into files" +- "progressive disclosure for my agent config" +- "clean up / organize my CLAUDE.md" -## Execution Checklist +**Do NOT use this skill when:** +- The file is already under 80 lines — pruning and editing is faster than restructuring +- The platform is Claude.ai Projects, Copilot, Cursor, or Aider — linked files don't work there; offer prune-only instead +- The user wants to write new instructions from scratch — this skill refactors, it doesn't author +**Platform decision tree:** ``` -[ ] Phase 1: All contradictions identified and resolved -[ ] Phase 2: Root file contains ONLY essentials -[ ] Phase 3: All remaining instructions categorized -[ ] Phase 4: File structure created with proper links -[ ] Phase 5: Redundant/vague instructions removed -[ ] Verify: Each linked file is self-contained -[ ] Verify: Root file is under 50 lines -[ ] Verify: All links work correctly +What platform(s) does this file serve? +├── Claude Code only → full progressive disclosure (linked files valid) +├── Claude.ai Projects only → flat only; prune and reorganize, no splitting +├── Copilot / Cursor / Aider → flat only; splitting breaks it +└── Multiple platforms → create separate files per platform, or keep flat ``` --- -## Anti-Patterns +## Philosophy -| Avoid | Why | Instead | -|-------|-----|---------| -| Keeping everything in root | Bloated, hard to maintain | Split into linked files | -| Too many categories | Fragmentation | Consolidate related topics | -| Vague instructions | Wastes tokens, no value | Be specific or delete | -| Duplicating defaults | Agent already knows | Only override when needed | -| Deep nesting | Hard to navigate | Flat structure with links | +Agent instruction files are not documentation — they are compiler inputs. Every byte costs inference tokens on every task. The goal is maximum signal density at minimum size: root file carries only what every task needs, linked files carry the rest, and anything a competent agent already knows gets deleted. --- -## Examples +## NEVER -### Before (Bloated Root) -```markdown -# CLAUDE.md +- **NEVER split files before confirming platform** — linked files are silently ignored on Claude.ai Projects, Copilot, Cursor, and Aider; splitting a multi-platform file breaks all non-Claude-Code users without any error message. -This is a React project. +- **NEVER delete instructions that contain project-specific nouns** — library names, internal hook names, team-specific patterns, and incident-derived rules all look like "generic best practices" to an outside reader but encode irreplaceable context. Only delete instructions that could appear verbatim in any project. -## Code Style -- Use 2 spaces -- Use semicolons -- Prefer const over let -- Use arrow functions -... (200 more lines) +- **NEVER create linked files that link to more linked files** — agents follow links from root, then read the target file. They do not recursively follow links inside linked files. Any instruction buried at depth 2+ is effectively invisible, creating a false sense of organization with zero functional benefit. -## Testing -- Use Jest -- Coverage > 80% -... (100 more lines) +- **NEVER merge contradicting instructions without user confirmation** — two conflicting rules might both be intentional (e.g., "use semicolons" in source, "no semicolons" in test files). Silently picking one invalidates half the project's convention history. -## TypeScript -- Enable strict mode -... (150 more lines) -``` +- **NEVER apply progressive disclosure to a file under 80 lines** — the overhead of maintaining links and multiple files exceeds the context savings. Small files should be pruned, not split. -### After (Progressive Disclosure) -```markdown -# CLAUDE.md +- **NEVER categorize by topic noun instead of task trigger** — `style-and-conventions.md` is organized around what the content is; `code-review.md` is organized around when it is needed. Agents retrieve context when starting a task — trigger-based grouping matches how they actually read. -React dashboard for real-time analytics visualization. +--- -## Commands -- `pnpm dev` - Start development server -- `pnpm test` - Run tests with coverage -- `pnpm build` - Production build +## When Things Go Wrong -## Guidelines -- [Code Style](.claude/code-style.md) -- [Testing](.claude/testing.md) -- [TypeScript](.claude/typescript.md) -``` +| Situation | Likely Cause | Recovery | +|-----------|-------------|----------| +| Agent stops following linked files | Platform is Claude.ai Projects or Copilot — linked files are not followed | Flatten back to single file; use section headers instead | +| Linked file exists but agent never reads it | No link in root file pointing to it, or link at depth 2+ | Add explicit link from root; never nest links | +| Refactored file loses a critical constraint | Domain-specific rule deleted as "obvious" | Restore from git history; add to deletion triage checklist — confirm all project-noun rules with user | +| Root file creep returns after 2 months | No process to prevent additive drift | Add line-count gate to CI or README: "Root file must stay under 50 lines" | +| Split created contradictions that didn't exist before | Copy-paste from different source contexts during categorization | Re-read both files; surface conflict to user before finalizing | --- -## Verification +## Quick Process -After refactoring, verify: +Full workflow detail: [references/refactor-workflow.md](references/refactor-workflow.md) +Anti-patterns catalog + platform compatibility matrix: [references/anti-patterns-catalog.md](references/anti-patterns-catalog.md) -1. **Root file is minimal** - Under 50 lines, only universal info -2. **Links work** - All referenced files exist -3. **No contradictions** - Instructions are consistent -4. **Actionable content** - Every instruction is specific -5. **Complete coverage** - No instructions were lost (unless flagged for deletion) -6. **Self-contained files** - Each linked file stands alone +**Five-phase summary:** ---- +1. **Platform check** — confirm linked files are valid for this target +2. **Full read + contradiction scan** — read complete file; surface all conflicts to user before touching anything +3. **Essentials extraction** — apply "every task" test; root file target under 50 lines +4. **Categorize by task trigger** — 3–6 linked files max; group by WHEN needed, not WHAT topic +5. **Deletion triage** — three buckets: DELETE (universal defaults), KEEP (domain-specific), ESCALATE (ask user) diff --git a/skills/agent-md-refactor/references/anti-patterns-catalog.md b/skills/agent-md-refactor/references/anti-patterns-catalog.md new file mode 100644 index 0000000..bb954e0 --- /dev/null +++ b/skills/agent-md-refactor/references/anti-patterns-catalog.md @@ -0,0 +1,88 @@ +# Anti-Patterns Catalog — Agent MD Refactor + +## Category 1: Root File Bloat + +### Chronological accumulation drift +Every sprint someone adds 3 lines to CLAUDE.md. After 6 months it's 800 lines. No single section looks unreasonable in isolation — the problem is additive. +**Signal:** Lines added > lines removed over last 10 commits. + +### Defensive over-specification +Teams write walls of rules after a single bad incident. "Always check if file exists before reading" appears because an agent once deleted a file. The rule now burns tokens on every task even when irrelevant. +**Signal:** Instructions that read like post-mortems ("Never again X"). + +### Platform-agnostic instructions in platform-specific files +CLAUDE.md written for Claude Code containing Copilot-style slot-fill patterns. The instruction format doesn't match the agent runtime. +**Signal:** Instructions that reference ``, `{{variable}}`, or step-by-step wizard patterns. + +--- + +## Category 2: Structural Errors + +### False modularity — splitting without linking +Files created in `.claude/` but root file never references them. Agent never reads them. +**Signal:** `.claude/*.md` files with zero inbound links from root. + +### Cross-file contradiction via copy-paste +`testing.md` says "mock all external calls." `architecture.md` says "prefer integration tests." Both were copied from different projects. +**Signal:** Two linked files with semantically opposite rules. + +### Depth-first file structure +``` +.claude/ + backend/ + api/ + rest/ + conventions.md ← agent never reaches this +``` +Agents follow links one level at a time. Anything beyond depth 2 is effectively invisible. +**Signal:** Any linked file that is itself a directory index pointing to more files. + +--- + +## Category 3: Content Quality Failures + +### Rule without context = ignored rule +"Use functional components only" — but WHY? When an agent encounters a class component in existing code, it doesn't know if refactoring it is in scope. +**Signal:** Rules with no rationale and no scope boundary. + +### Instruction-as-aspiration +"Write maintainable, well-tested, performant code" belongs in a team handbook, not an agent instruction file. Agents need executable conditions, not values. +**Signal:** Any rule where compliance cannot be verified by inspection. + +### Stale command blocks +``` +npm run dev +``` +Project switched to pnpm 4 months ago. Agent runs the wrong command. No error visible in output (just wrong behavior). +**Signal:** Commands that don't match package.json scripts or Makefile targets. + +--- + +## Category 4: Refactor Execution Errors + +### Refactoring without reading the full file first +Splitting on first scan misses contradictions that only appear when you see instruction A on line 12 and its contradiction on line 340. Always read complete before splitting. + +### Over-categorizing +Eight linked files for a 200-line AGENTS.md. Now the agent reads 8 files to find one rule. You've increased context cost, not reduced it. +**Threshold:** One linked file per ~150 lines of original content is reasonable. + +### Losing provenance on deletions +Flagging "Write clean code" for deletion is safe. Flagging a domain-specific constraint ("never use mutable default arguments in Python config objects") because it "sounds like best practices" destroys institutional knowledge. +**Rule:** Only delete instructions that are universal defaults. When in doubt, keep. + +--- + +## Platform Compatibility Matrix + +| Platform | Root File | Linked Files Followed? | Max Depth | Notes | +|----------|-----------|------------------------|-----------|-------| +| Claude Code | CLAUDE.md | Yes (explicit Read) | Unlimited | Agent actively follows links | +| Claude.ai Projects | Project Knowledge | No — flat only | 1 | All instructions must be in one file | +| GitHub Copilot | COPILOT.md (or .github/copilot-instructions.md) | No | 1 | Single file, no linking | +| Cursor | .cursorrules | No | 1 | Flat JSON or markdown | +| Aider | .aider.conf | No | 1 | Config file, not linked docs | +| Devin | DEVIN.md | Limited | 2 | Partial link following | +| OpenHands | .openhands_instructions | No | 1 | Flat only | + +**Critical implication:** Progressive disclosure (linked files) only works for Claude Code. For all other platforms, all instructions must remain in the root file — splitting actively breaks them. diff --git a/skills/agent-md-refactor/references/refactor-workflow.md b/skills/agent-md-refactor/references/refactor-workflow.md new file mode 100644 index 0000000..d28287a --- /dev/null +++ b/skills/agent-md-refactor/references/refactor-workflow.md @@ -0,0 +1,103 @@ +# Refactor Workflow — Agent MD Refactor + +## Pre-Flight: Platform Check (Do This First) + +Before any restructuring, identify the target platform(s). + +``` +Does this file serve Claude Code exclusively? + YES → progressive disclosure (linked files) is valid + NO → keep flat; only prune and reorganize within single file + BOTH → create Claude Code version + separate flat version +``` + +See [anti-patterns-catalog.md](anti-patterns-catalog.md) for the full platform compatibility matrix. + +--- + +## Phase 1: Contradiction Scan + +Read the entire file before touching anything. Map: +- Line numbers of each rule +- Scope of each rule (always / when / if) +- Any two rules that could conflict on the same codebase action + +**Output to user (if contradictions found):** +``` +Contradiction found: + Line 12: "Use semicolons" + Line 187: "No semicolons in test files" + +Q: Is the second rule intentional (tests use different style) or a copy-paste error? +``` + +Do not proceed past Phase 1 until contradictions are resolved. + +--- + +## Phase 2: Essentials Extraction + +Apply the "every task" test: if an instruction only matters for 30% of tasks, it belongs in a linked file, not root. + +**Root file keeps:** +- Project identity (1 sentence) +- Non-default commands (build, test, typecheck — only if non-standard) +- Package manager (only if not npm) +- Hard overrides (things that MUST take precedence over agent defaults) +- Rules that apply to 100% of tasks + +**Root file target:** Under 50 lines. + +--- + +## Phase 3: Categorization + +Group by WHEN they are needed, not by WHAT they are about. + +**Effective grouping:** `testing.md` (needed when writing or modifying tests) +**Ineffective grouping:** `style-and-conventions-and-formatting-and-naming.md` (too broad) + +**Sizing rule:** 3–6 linked files maximum. If you have more than 6, merge the least-referenced topics. + +--- + +## Phase 4: Deletion Triage + +Three-bucket sort for each candidate instruction: + +| Bucket | Criterion | Action | +|--------|-----------|--------| +| DELETE | Universal default — agent already knows | Remove | +| KEEP | Domain-specific or non-obvious | Keep | +| ESCALATE | Unclear if project-specific or generic | Ask user | + +**Never delete without user confirmation if the instruction:** +- Contains a specific library name +- References a project-internal pattern (e.g., "use our AuthContext hook") +- Has a non-obvious rationale embedded in it + +--- + +## Phase 5: Link Validation + +After creating linked files: +1. Every file referenced from root must exist +2. No linked file references another linked file (depth 2+ = invisible) +3. Root file links use relative paths (not absolute) +4. Verify link syntax matches target platform (Markdown `[text](path)` for Claude Code) + +--- + +## Execution Checklist + +``` +[ ] Platform identified — confirm linked files are valid for this platform +[ ] Full file read before any changes +[ ] All contradictions surfaced to user and resolved +[ ] Root file under 50 lines +[ ] 3–6 linked files (not more) +[ ] No linked file references another linked file +[ ] Deletion candidates confirmed with user for domain-specific rules +[ ] All links validated (files exist, paths correct) +[ ] No instruction lost without explicit user approval +``` diff --git a/skills/api-design/SKILL.md b/skills/api-design/SKILL.md new file mode 100644 index 0000000..7fb2eab --- /dev/null +++ b/skills/api-design/SKILL.md @@ -0,0 +1,412 @@ +--- +name: api-design +description: Design API contracts before implementation — protocol selection, pagination, versioning, error standardization, OpenAPI spec authoring, and breaking-change classification. Use when asked to "design an API", "spec out endpoints", "pick REST vs GraphQL", "define error responses", "plan API versioning", or "write an OpenAPI spec". Sits between requirements-clarity (what to build) and openapi-to-typescript (typed interfaces from spec). +--- + +## Mindset + +- **The spec is a contract, not documentation.** An API spec written after the fact documents what was shipped. A spec written before implementation constrains what gets shipped. Spec-first is a design discipline, not a paperwork exercise. +- **The HTTP status code IS the error signal.** A 200 with `{"success": false}` in the body is a protocol violation that breaks every intermediary — proxies, caches, circuit breakers, monitoring dashboards — because they all treat 2xx as success. The body elaborates; the status decides. +- **Consumers don't read your source code.** Every invariant you enforce in the backend (write-once fields, enum transitions, per-org rate limits) must appear explicitly in the contract. If it isn't in the spec, frontend will rediscover it at 4 PM on release day. +- **Pagination is a data consistency guarantee, not a performance knob.** The correct pagination strategy depends on whether the underlying data changes during iteration. Offset breaks silently on concurrent writes. Cursor is required for correctness on any live data set — not just for performance. +- **Versioning debt compounds.** Every URL path version (`/v1/`, `/v2/`) is a permanent branch in production. Non-breaking evolution (additive fields, new optional params) should not create a new version. Reserve versioning for genuine breaking changes. +- **Error codes are machine interfaces.** `"Email is invalid"` is a display string. `INVALID_EMAIL` is a contract. Consumers branch on codes, not sentences. Sentences are localized, truncated, and changed mid-iteration. Codes are not. + +## Navigation + +**Use this skill when**: +- Designing a new API surface (REST, GraphQL, gRPC) before any implementation +- Choosing between protocol options and need decision criteria beyond "REST is standard" +- Defining error response structure, pagination strategy, or versioning policy +- Writing or reviewing an OpenAPI spec (spec-first or code-first) +- Classifying whether a proposed change is breaking or non-breaking +- Documenting rate limiting, auth schemes, or retry conventions in a spec + +**Do NOT use this skill when**: +- Backend implementation is already complete and you need consumer-facing docs — use backend-to-frontend-handoff-docs +- You have an OpenAPI spec and need TypeScript interfaces — use openapi-to-typescript +- Requirements are still vague ("build me a user API") — use requirements-clarity first +- You need database schema design — use database-schema-designer + +**Quick decision tree**: +``` +Is the API surface new (no prior contract exists)? + YES → Start at Protocol Selection, then Pagination, then Versioning + NO → Skip to Breaking-Change Classification; existing contract governs + +Does the user have a written OpenAPI spec? + YES → Jump to Spec Review; check for common omissions + NO → Ask: spec-first or code-first? (see section below) + +Is the user asking about errors/status codes? + YES → Error Standardization section; apply RFC 7807 +``` + +## Philosophy + +API design is constraint selection. Every decision — protocol, pagination style, version scheme, error shape — closes future options in exchange for present clarity. Bad API design is not missing features; it is missing constraints. Underconstrained APIs allow callers to build assumptions that are never honored, and allow implementers to ship behaviors that are never documented. + +## NEVER + +- **NEVER use offset pagination for any resource that can be modified during iteration** — offset is computed against the current state of the dataset. Between page 1 and page 2, concurrent inserts shift every subsequent row. Items are skipped or duplicated invisibly, with no error to surface. Cursor pagination anchors to a stable position regardless of concurrent writes. +- **NEVER define error responses as a single string `message` field** — a bare string message is consumed by displaying it to users. Machine-readable error bodies (`code` + `detail` + optional `field`) allow consumers to branch programmatically: retry on `RATE_LIMIT_EXCEEDED`, show a field hint on `INVALID_EMAIL`, escalate on `PERMISSION_DENIED`. Strings cannot support this without string parsing. +- **NEVER return HTTP 200 with a `success: false` body** — the HTTP status is the error signal for every layer that touches the response (proxy, cache, CDN, circuit breaker, monitoring alert, client SDK). A 200 disables all of that. The body is elaboration only; it cannot override the status. +- **NEVER version your API in the URL by default** — `/v1/users` creates a permanent production branch. When `/v2/users` ships, `/v1/users` must remain operational indefinitely (or you break clients). Header versioning (`API-Version: 2024-01`) makes version negotiation explicit and allows sunset without a parallel URL tree. Override this ONLY when developer experience for public APIs demands URL discoverability. +- **NEVER design a PATCH endpoint that treats absent fields as nulls** — in a partial update, an absent field means "do not change this field." A null field means "set this field to null." These are distinct intents. A PATCH that collapses them forces callers to always send every optional field to protect values they don't want cleared — defeating the purpose of PATCH. +- **NEVER use boolean query parameters for state that could acquire a third value** — `?active=true/false` will eventually need `?active=pending` or `?active=null`. Boolean URL params cannot express this; the API must be redesigned. Use `?status=active|inactive|pending` from the start. +- **NEVER define rate limit behavior without spec-level documentation** — rate limits that are not in the spec are discovered by consumers via production 429 errors. Document the limit scope (per user, per IP, per org), the algorithm (token bucket, sliding window), and the response headers (`Retry-After`, `X-RateLimit-Remaining`) before the API ships. + +## Protocol Selection + +### REST vs GraphQL vs gRPC Decision Criteria + +| Criterion | REST | GraphQL | gRPC | +|-----------|------|---------|------| +| Consumer profile | Multiple external clients, public APIs | Frontend teams with highly variable query shapes | Internal service-to-service, performance-critical | +| Schema evolution | Additive (non-breaking); versioning for removals | Schema-first; deprecations via `@deprecated` directive | Proto files; strongly typed; forward/backward compat via field numbers | +| Payload control | Fixed shapes; over/under-fetch common | Consumer-specified; fetch only needed fields | Fixed shapes; binary encoding | +| Tooling maturity | Broadest ecosystem | Good; Apollo, graphql-codegen | Strong in polyglot; requires protoc toolchain | +| Error handling | HTTP status codes + RFC 7807 body | Always 200; errors in `errors[]` array | gRPC status codes (not HTTP) | +| Caching | Native HTTP caching (ETags, Cache-Control) | Query hashing required; no standard cache key | Application-level only | + +**Non-obvious selection criteria practitioners miss**: + +- **GraphQL's 200-for-everything is a deliberate trade-off, not a design flaw.** But it means your HTTP-layer monitoring (Datadog, Sentry, uptime checks) will report 100% success even during partial outages. You must instrument on the `errors` array, not on HTTP status. +- **REST over-fetching is a performance problem only after you have the traffic.** Premature GraphQL adoption for "performance" typically means paying the schema-design and tooling tax before you have evidence over-fetching is actually a bottleneck. +- **gRPC requires HTTP/2.** Browser clients cannot use gRPC directly (gRPC-Web is a separate protocol with a proxy layer). If you have browser consumers, gRPC is an internal transport, not a public API. +- **REST and GraphQL are not exclusive.** Large platforms often serve a REST API for external partners (stable, versioned contracts) and GraphQL for internal frontend (flexible, schema-driven iteration). + +## Pagination Patterns + +### Cursor vs. Offset Trade-offs + +**Offset pagination** (`?page=2&limit=20`, `?offset=40`): +- Simple to implement, intuitive for humans, supports random access ("go to page 10") +- **Breaks silently on concurrent writes**: if 5 rows are inserted after page 1 is fetched, page 2 starts 5 rows later in the result set — rows are skipped, not duplicated, with no error +- **Breaks on large offsets**: `OFFSET 100000` in SQL requires the database to scan and discard 100,000 rows; performance degrades with depth +- Acceptable for: static datasets (product catalogs, historical archives), user-facing paginated reports where the user pages through manually + +**Cursor pagination** (`?cursor=&limit=20`): +- Cursor anchors to a stable position (row ID, timestamp, composite key) +- Concurrent inserts/deletes do not affect positions relative to the cursor +- Cannot support random access ("jump to page 10") — only forward (and optionally backward) navigation +- Required for: feeds, activity streams, audit logs, any resource writable by concurrent processes during iteration +- **Cursor must be opaque to callers** — do not expose raw SQL offsets or timestamps as cursors; expose an opaque base64 token. This allows cursor implementation to change without breaking callers. + +**Keyset pagination** is a variant of cursor where the position is based on indexed column values (e.g., `WHERE created_at < ? AND id < ?`). Use when sort order matters and you control the index. + +### What Practitioners Get Wrong + +- Choosing offset "for now" on a resource that accepts writes. The breakage is invisible in tests (tests don't write concurrently) and shows up as user-reported missing items in production feeds. +- Exposing cursor internals. A cursor like `?cursor=2024-05-01T12:00:00Z` couples callers to your sort implementation and breaks if you ever change the timestamp column type. +- Forgetting backward cursors. Most cursor implementations only go forward. If the product requires "go back," build `prev_cursor` from the start — retrofitting it changes the response schema. + +## API Versioning Strategy + +### URL vs. Header vs. Content Negotiation + +**URL versioning** (`/v1/users`, `/v2/users`): +- Immediately visible; no special headers needed; easy to share URLs +- Creates permanent production branches — `/v1` must run indefinitely once published +- Encourages breaking changes by making them "cheap" (just bump `/v2`) +- Appropriate for: public APIs with diverse consumers who cannot control request headers (webhooks, third-party integrations, browser fetch without CORS pre-flight complexity) + +**Header versioning** (`API-Version: 2024-01`, `X-API-Version: 2`): +- No URL pollution; single URL tree; version sunset without URL proliferation +- Not visible in browser address bar; harder to test manually; requires documentation +- Appropriate for: internal APIs, developer platforms where consumers control all request headers, APIs where sunset timelines are enforced + +**Content negotiation** (`Accept: application/vnd.api+v2+json`): +- Standards-based; caches correctly (Vary header) +- Complex for consumers; rarely implemented correctly in practice +- Appropriate for: media APIs, hypermedia/HATEOAS APIs; almost never the right choice for standard JSON REST APIs + +**Date-based header versioning** (used by Stripe, Anthropic) is the most sustainable pattern for high-churn APIs: +- Version string is a date: `API-Version: 2024-11-01` +- Each version is a snapshot of the API on that date +- Consumers pin to a version; new features ship as new version dates +- Breaking changes require a new date version; old dates run until sunset + +### What Practitioners Get Wrong + +- Versioning every release, not every breaking change. Versions are not release numbers. Non-breaking changes (additive fields, new optional params, new enum values) must not increment the version — this trains consumers to update version pins constantly and increases migration fatigue. +- Forgetting that "removing a field" is a breaking change even when the field was optional. Consumers who read the field get `undefined` after removal. Optional fields cannot be removed without a version bump. +- Adding URL versioning to a private internal API. Internal services can coordinate upgrades; they do not need permanent URL branches. Use header versioning or no versioning (coordinate rollout directly). + +## Error Response Standardization + +### RFC 7807 Problem Details + +Use RFC 7807 (`application/problem+json`) as the standard error shape: + +```json +{ + "type": "https://errors.example.com/invalid-input", + "title": "Validation Failed", + "status": 422, + "detail": "The request body contains invalid fields.", + "instance": "/requests/abc123", + "errors": [ + { + "code": "INVALID_EMAIL", + "field": "email", + "detail": "The provided email address is not a valid format." + } + ] +} +``` + +**Required fields**: `type` (URI, dereferenceable or not), `title` (human, stable), `status` (mirrors HTTP status), `detail` (specific to this occurrence). + +**The `errors` array extension** is not part of RFC 7807 base but is universally expected for validation errors. Always extend Problem Details with a machine-readable `errors` array for 422 responses. + +### Status Code Taxonomy + +| Situation | Correct Code | Common Wrong Code | +|-----------|-------------|-------------------| +| Validation error (bad input shape, invalid values) | **422 Unprocessable Entity** (RFC 4918) | 400 Bad Request | +| Malformed JSON / unparseable request body | 400 Bad Request | 422 | +| Not authenticated (no token, expired token) | 401 Unauthorized | 403 | +| Authenticated but lacks permission | 403 Forbidden | 401 | +| Resource not found | 404 Not Found | 200 with `null` body | +| Optimistic lock conflict / concurrent edit | 409 Conflict | 400 | +| Rate limit exceeded | 429 Too Many Requests | 400 or 503 | +| Downstream dependency unavailable | 503 Service Unavailable | 500 | +| Intentional temporary maintenance window | 503 with `Retry-After` | 500 | + +**HTTP 422 for validation errors** is explicitly correct per RFC 4918. The common objection ("400 is simpler") is wrong: 400 means the server could not parse the request. 422 means the request was parsed, understood, and rejected due to semantic errors. These are different conditions. Monitoring dashboards, clients, and retry logic all benefit from the distinction. + +### Error Code Taxonomy + +Error codes must be: +- **Machine-parseable strings**: `INVALID_EMAIL`, not `"Email address is not valid"` +- **SCREAMING_SNAKE_CASE** (conventional, widely recognized) +- **Domain-scoped for large APIs**: `AUTH_TOKEN_EXPIRED`, `PAYMENT_CARD_DECLINED`, `USER_EMAIL_DUPLICATE` +- **Not sentences, not display strings** — the `detail` field carries the human message; `code` is for programmatic branching + +Organize codes in tiers: +1. **Generic codes** (used by all endpoints): `VALIDATION_FAILED`, `NOT_FOUND`, `UNAUTHORIZED`, `FORBIDDEN`, `RATE_LIMIT_EXCEEDED`, `INTERNAL_ERROR` +2. **Domain codes** (resource-specific): `USER_EMAIL_DUPLICATE`, `ORDER_ALREADY_SUBMITTED`, `PAYMENT_INSUFFICIENT_FUNDS` +3. **Field-level codes** (in `errors[]` array): `INVALID_EMAIL`, `REQUIRED_FIELD_MISSING`, `VALUE_TOO_LONG` + +### Retry Signal Conventions + +| Code | Retryable? | Convention | +|------|-----------|------------| +| 429 | Yes, after backoff | Always include `Retry-After` header (seconds) | +| 503 | Yes, after backoff | Include `Retry-After` if duration is known | +| 502, 504 | Yes, immediate | Network/gateway errors; exponential backoff | +| 409 | Conditional | Re-fetch resource, re-apply change, retry | +| 422 | No | Validation error; retry without fixing input is pointless | +| 400, 401, 403, 404 | No | Client error; retry is identical to original failure | +| 500 | No (by default) | Internal server error; do not retry by default; operator should investigate | + +## OpenAPI Spec Authoring + +### Spec-First vs. Code-First + +**Spec-first**: Write the OpenAPI spec before any implementation. Treat the spec as the source of truth. Implementation must match it. +- Use when: multiple consumers, cross-team coordination needed, public API, contract needs review/approval before dev starts +- Tooling: Swagger Editor, Stoplight Studio, Redocly +- Risk: spec drift if implementation deviates and spec is not updated + +**Code-first**: Annotate code (decorators, docstrings); tooling generates the spec. +- Use when: solo developer, internal service, spec accuracy matters more than spec timing +- Tooling: FastAPI (Python), tsoa (TypeScript), Springdoc (Java) +- Risk: generated spec omits information code cannot express (intent, deprecation warnings, narrative descriptions) + +**When spec-first is required** even if code-first is preferred: +- Any API with a partner or public consumer — they cannot wait for code to be written to see the contract +- Any API used by a frontend team that is developing in parallel — frontend needs types from the spec before backend is done + +### Required Fields Practitioners Miss + +In `paths`: +- `operationId` on every operation — without it, code generators use method+path as the function name, which is often unreadable +- `tags` on every operation — controls grouping in generated docs and SDK namespacing +- `summary` AND `description` — summary is one line for the table of contents; description carries nuance (state machine, error triggers, business rules) + +In `components/schemas`: +- `description` on every property — a property named `status` with no description forces consumers to guess from example values +- `readOnly: true` on server-generated fields (IDs, timestamps) — without this, code generators include them in request bodies, allowing callers to attempt to set immutable fields +- `nullable: false` explicitly when a field is never null — prevents consumers from over-writing null handling + +In `responses`: +- A response definition for **every** error status code the endpoint can return — not just 200 and a generic 4xx +- `default` response referencing a Problem Details schema — catches undocumented errors +- `headers` for `429` responses — document `Retry-After` and `X-RateLimit-*` headers here, not only in prose + +In `security`: +- Specify security schemes at the operation level, not just globally, when endpoints have mixed auth (some public, some protected) + +### Common OpenAPI Mistakes + +- Defining all errors as `{ message: string }` in the spec, then returning RFC 7807 in implementation. The spec is wrong the day it ships. +- Using `additionalProperties: true` everywhere to avoid schema maintenance. This makes the spec useless as a contract — it permits any payload. +- Using `anyOf` where `oneOf` was intended. If only one variant is valid per request, use `oneOf` and add a `discriminator`. +- Omitting `format` on string fields that have known formats (`email`, `uri`, `date-time`, `uuid`) — validators and code generators use `format` to apply semantic validation. + +## Breaking vs. Non-Breaking Change Classification + +### What Practitioners Get Wrong + +The common definition is: "breaking = removed field." The correct definition is: **a breaking change is any change that requires an existing well-behaved consumer to change their code.** + +**Non-breaking (safe to ship without version bump)**: +- Adding a new optional field to a response (consumers ignore unknown fields if coded defensively) +- Adding a new optional query parameter +- Adding a new endpoint +- Adding a new enum value to a response field *if consumers are coded to handle unknown values* — this is the disputed case (see below) +- Relaxing a constraint (accepting a previously-invalid value) + +**Breaking (requires version bump or migration path)**: +- Removing any field from a response, even an optional one +- Renaming a field +- Changing a field's type (string → number, string → string[]) +- Changing a field's nullability (non-null → nullable, nullable → non-null) +- Adding a new **required** field to a request +- Removing or renaming a query parameter +- Changing the HTTP status code for a success response +- Tightening a constraint (rejecting a previously-valid value) +- Changing authentication scheme + +**The disputed case: new enum values in responses**. Technically non-breaking (you added a value). In practice, breaking for consumers who use exhaustive switch statements or who validate response data against a known-values list. Treat new response enum values as breaking for any statically-typed consumer, or document explicitly that all enums are open-ended (consumers must handle unknown values). + +**PATCH field removals are always breaking** — even if the field is optional. A consumer sending PATCH payloads with that field now sends an unknown field; behavior depends on server implementation (ignore vs. error). + +## Rate Limiting Documentation + +### Documenting Rate Limits in the OpenAPI Spec + +Rate limits must appear in the spec — not only in prose documentation. Use response headers on every endpoint subject to rate limiting: + +```yaml +responses: + "200": + headers: + X-RateLimit-Limit: + schema: + type: integer + description: Maximum requests allowed in the current window. + X-RateLimit-Remaining: + schema: + type: integer + description: Requests remaining in the current window. + X-RateLimit-Reset: + schema: + type: integer + format: int64 + description: Unix timestamp (seconds) when the window resets. + "429": + headers: + Retry-After: + schema: + type: integer + description: Seconds to wait before retrying. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/ProblemDetails" +``` + +Document rate limit scope explicitly in endpoint descriptions: +- Per user? Per API key? Per organization? Per IP? +- Token bucket (burst allowed) or fixed window (no burst)? +- Separate limits per endpoint or global account limit? + +### What Practitioners Get Wrong + +- Documenting rate limits only in a separate "limits" guide page, not in the spec. Consumers encounter 429 before they find the guide. +- Omitting `Retry-After` on 429 responses. Without it, consumers implement arbitrary backoffs (usually too short or too long). +- Using the same rate limit scope for bulk and single-item endpoints. A `POST /bulk-import` (100 records/request) and `POST /records` (1 record/request) should not share the same per-request limit. + +## Auth Scheme Selection in OpenAPI + +### Bearer (JWT / opaque tokens) + +```yaml +components: + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT # Informational only; not validated by tooling +``` + +Use for: user-facing APIs, APIs with short-lived session tokens, anywhere you need to embed claims without a database lookup. + +Document explicitly: +- Token location (always `Authorization: Bearer ` for `http` scheme — but confirm no cookie fallback) +- Token lifetime and refresh flow +- What 401 means (token absent or expired) vs. 403 (token valid, permission denied) + +### API Key + +```yaml +components: + securitySchemes: + ApiKeyHeader: + type: apiKey + in: header + name: X-API-Key +``` + +Use for: server-to-server, webhook consumers, partner integrations where user identity is not relevant. + +`in` can be `header`, `query`, or `cookie`. **Prefer `header`** — API keys in query strings appear in server logs, browser history, and referrer headers. + +### OAuth2 + +```yaml +components: + securitySchemes: + OAuth2: + type: oauth2 + flows: + authorizationCode: + authorizationUrl: https://auth.example.com/authorize + tokenUrl: https://auth.example.com/token + scopes: + read:users: Read user profiles + write:users: Create and update users +``` + +**Flow selection**: +| Consumer | Correct Flow | +|----------|-------------| +| Browser SPA or mobile app | `authorizationCode` with PKCE (no client secret) | +| Server-side web app | `authorizationCode` (can store client secret) | +| Machine-to-machine / service account | `clientCredentials` | +| Legacy / resource owner password flow | Avoid; `password` flow is deprecated in OAuth 2.1 | + +**What practitioners get wrong**: +- Using `clientCredentials` for user-delegated access. Client credentials authenticate the application, not a user. No user identity or user-scoped permissions are in the token. +- Omitting `scopes` from the spec. Undocumented scopes force consumers to reverse-engineer required permissions from 403 errors. +- Using the `implicit` flow. It is deprecated in OAuth 2.1 and has known security weaknesses. Use `authorizationCode` + PKCE for SPAs. + +## When Things Go Wrong + +| Situation | Likely Cause | Recovery | +|-----------|-------------|----------| +| Frontend reports missing items in paginated feed | Offset pagination on a writable resource | Migrate to cursor; add `next_cursor`/`prev_cursor` to response; deprecate `page`/`offset` params with a version sunset | +| Consumer cannot distinguish "field absent" from "field null" in PATCH | PATCH implementation does not differentiate | Define explicit null semantics in spec: document that absent = no-op, null = clear; enforce in validation layer | +| Multiple clients broken after adding a new enum value to response | Clients use exhaustive switch/match without default branch | Document all response enums as "open-ended"; add a breaking-change note to changelog; publish migration guide | +| Monitoring shows 100% success rate during outage | Errors returned as 200 with error body | Find all endpoints returning `success: false`; change to correct 4xx/5xx; this is always a breaking change — version accordingly | +| Rate limit scope is wrong (org-level limit but should be per-user) | Limit designed without multi-tenant analysis | Change scope; this is a behavioral breaking change; notify consumers before rollout | +| OpenAPI spec and implementation diverge over time | Code-first tooling not run as part of CI | Add spec regeneration to CI pipeline; fail CI on spec diff | + +## Workflow + +1. **Confirm protocol** — use the decision criteria table; document the rationale in the spec's `info.description` +2. **Define pagination strategy** — is any resource mutable during iteration? Yes → cursor required +3. **Define versioning policy** — document it in `info.description` before the first endpoint is designed +4. **Define error schema** — add a `ProblemDetails` component schema; reference it in every error response +5. **Author spec** — spec-first if consumers are parallel; code-first if solo/internal +6. **Classify each design decision** — for each field/endpoint, note whether future changes would be breaking +7. **Document rate limits** — every rate-limited endpoint gets response headers documented in spec +8. **Review auth scheme** — confirm flow type for each OAuth2 usage; confirm key location for API key auth + +**Natural next steps after this skill**: +- `openapi-to-typescript` — generate TypeScript interfaces from the completed spec for frontend consumers +- `backend-to-frontend-handoff-docs` — after implementation, generate consumer-facing behavioral documentation +- `requirements-clarity` — if design reveals unresolved requirement gaps, return upstream diff --git a/skills/backend-to-frontend-handoff-docs/SKILL.md b/skills/backend-to-frontend-handoff-docs/SKILL.md index faac6de..abba46e 100644 --- a/skills/backend-to-frontend-handoff-docs/SKILL.md +++ b/skills/backend-to-frontend-handoff-docs/SKILL.md @@ -1,122 +1,77 @@ --- name: backend-to-frontend-handoff-docs -description: Create API handoff documentation for frontend developers. Use when backend work is complete and needs to be documented for frontend integration, or user says 'create handoff', 'document API', 'frontend handoff', or 'API documentation'. +description: Generate API handoff documentation for frontend developers after backend work is complete. Use when backend implementation is done and needs to be documented for frontend integration. Trigger phrases: 'create handoff', 'document API', 'frontend handoff', 'API documentation', 'document endpoints for frontend'. --- -# API Handoff Mode +## Mindset -> **No Chat Output**: Produce the handoff document only. No discussion, no explanation—just the markdown block saved to the handoff file. +**You are writing a contract, not a tutorial.** Frontend developers (and their AI) should be able to implement integration with zero questions after reading this document. Every omission creates a Slack message, a meeting, or a bug. -You are a backend developer completing API work. Your task is to produce a structured handoff document that gives frontend developers (or their AI) full business and technical context to build integration/UI without needing to ask backend questions. +- **Asymmetry of pain**: The frontend developer discovers your omission at 4 PM on release day. Document what you know right now, even if it feels obvious. +- **Business logic leaks through the API**: If the backend enforces "a user can only submit once per 24-hour window per org," that rule must appear in the handoff — not just the HTTP 429 it produces. Frontend needs to disable the button, not just handle the error. +- **Error taxonomy beats status codes**: "Returns 422" tells frontend nothing. "Returns 422 with `{code: 'duplicate_submission', retryAfter: 86400}` when..." tells them how to build the error message. +- **Enums drift**: If the backend adds a new status mid-iteration and the handoff is stale, frontend silently mishandles it. Flag all enums as exhaustive or open-ended explicitly. +- **Auth is a contract, not an afterthought**: Document the token location (header vs cookie), the exact scope/role string, and what happens on expiry — not just "auth required." -> **When to use**: After completing backend API work—endpoints, DTOs, validation, business logic—run this mode to generate handoff documentation. +## Navigation -> **Simple API shortcut**: If the API is straightforward (CRUD, no complex business logic, obvious validation), skip the full template—just provide the endpoint, method, and example request/response JSON. Frontend can infer the rest. +**Use this skill when**: +- Backend API implementation is complete (endpoints, DTOs, validation, business logic done) +- A feature involves frontend integration that spans teams or AI agents +- API contracts changed and frontend needs a delta of what broke -## Goal -Produce a copy-paste-ready handoff document with all context a frontend AI needs to build UI/integration correctly and confidently. +**Do NOT use this skill when**: +- Backend work is still in progress — a premature handoff becomes a liability when contracts change +- The API is internal backend-to-backend (microservices, queue consumers) +- Frontend already has a Swagger/OpenAPI spec auto-generated from annotations — extend that instead of creating parallel docs -## Inputs -- Completed API code (endpoints, controllers, services, DTOs, validation). -- Related business context from the task/user story. -- Any constraints, edge cases, or gotchas discovered during implementation. +**Quick decision: full doc vs. shortcut** -## Workflow +| Signal | Action | +|--------|--------| +| CRUD endpoint, obvious field names, no business rules | Shortcut: endpoint + example JSON only | +| Any business logic, non-obvious validation, or state machine | Full template (see `references/TEMPLATE.md`) | +| Multiple related endpoints with shared DTOs | Full template; group by user flow, not by HTTP verb | -1. **Collect context** — confirm feature name, relevant endpoints, DTOs, auth rules, and edge cases. -2. **Create/update handoff file** — write the document to `.claude/docs/ai//api-handoff.md`. Increment the iteration suffix (`-v2`, `-v3`, …) if rerunning after feedback. -3. **Paste template** — fill every section below with concrete data. Omit subsections only when truly not applicable (note why). -4. **Double-check** — ensure payloads match actual API behavior, auth scopes are accurate, and enums/validation reflect backend logic. +## Philosophy -## Output Format +The handoff document is the API's *behavioral specification from the consumer's perspective*. It is not a description of what the backend does internally — it is a precise description of what the frontend can rely on, what it cannot assume, and what it must handle. -Produce a single markdown block structured as follows. Keep it dense—no fluff, no repetition. +## NEVER ---- +- **NEVER document internal class names, service layer names, or file paths** — because frontend contracts break when backend refactors, and internal names create cargo-cult coupling where frontend assumes structure that will change. +- **NEVER write "see code for details"** — because the handoff's entire purpose is to eliminate the need to read backend code. Any reference to source is an admission of failure. +- **NEVER use `any` or vague types like `object` in DTO examples** — because frontend TypeScript inference collapses to `any`, nullability bugs appear at runtime, and discriminated union narrowing is impossible without exact types. +- **NEVER omit the error response shape** — because frontend error handling almost always branches on the error body (`code`, `message`, `field`), not just on the HTTP status. A 422 with no documented body forces frontend to `console.log` in production. +- **NEVER document validation rules as "see frontend" or "same as UI"** — because backend validation is authoritative; frontend mirrors it for UX only. If the handoff doesn't list backend constraints, frontend re-derives them from HTTP errors at the worst possible time. +- **NEVER describe auth as "token required"** — because "token" is ambiguous across Bearer, cookie, API key, and session schemes. Document the exact header name, token format, scope string, and the HTTP status returned on auth failure (401 vs 403 mean different things). +- **NEVER omit nullability** — because `field?: string` (absent) vs `field: string | null` (present-but-null) are different serialization contracts. Frontend optional chaining fails differently on each, and the bug is invisible until edge data hits. +- **NEVER ship a handoff without at least one concrete example payload per endpoint** — because abstract field descriptions ("string, the user's name") are interpreted differently by every reader. A real example is unambiguous. +- **NEVER use "TBD" as a placeholder in a shipped handoff** — because frontend will assume TBD means "not my problem" and ship without handling that case. If it's genuinely unresolved, make it a blocking `## Open Questions` item with an owner and deadline. +- **NEVER generate the handoff before verifying that the actual implementation matches the docs** — because copy-pasting from a spec that was written before implementation is the #1 source of handoff drift. Read the actual controller/route handler before writing the response shape. -```markdown -# API Handoff: [Feature Name] - -## Business Context -[2-4 sentences: What problem does this solve? Who uses it? Why does it matter? Include any domain terms the frontend needs to understand.] - -## Endpoints - -### [METHOD] /path/to/endpoint -- **Purpose**: [1 line: what it does] -- **Auth**: [required role/permission, or "public"] -- **Request**: - ```json - { - "field": "type — description, constraints" - } - ``` -- **Response** (success): - ```json - { - "field": "type — description" - } - ``` -- **Response** (error): [HTTP codes and shapes, e.g., 422 validation, 404 not found] -- **Notes**: [edge cases, rate limits, pagination, sorting, anything non-obvious] - -[Repeat for each endpoint] - -## Data Models / DTOs -[List key models/DTOs the frontend will receive or send. Include field types, nullability, enums, and business meaning.] - -```typescript -// Example shape for frontend typing -interface ExampleDto { - id: number; - status: 'pending' | 'approved' | 'rejected'; - createdAt: string; // ISO 8601 -} -``` - -## Enums & Constants -[List any enums, status codes, or magic values the frontend needs to know. Include display labels if relevant.] - -| Value | Meaning | Display Label | -|-------|---------|---------------| -| `pending` | Awaiting review | Pending | - -## Validation Rules -[Summarize key validation rules the frontend should mirror for UX—required fields, min/max, formats, conditional rules.] - -## Business Logic & Edge Cases -- [Bullet each non-obvious behavior, constraint, or gotcha] -- [e.g., "User can only submit once per day", "Soft-deleted items excluded by default"] - -## Integration Notes -- **Recommended flow**: [e.g., "Fetch list → select item → submit form → poll for status"] -- **Optimistic UI**: [safe or not, why] -- **Caching**: [any cache headers, invalidation triggers] -- **Real-time**: [websocket events, polling intervals if applicable] - -## Test Scenarios -[Key scenarios frontend should handle—happy path, errors, edge cases. Use as acceptance criteria or test cases.] - -1. **Happy path**: [brief description] -2. **Validation error**: [what triggers it, expected response] -3. **Not found**: [when 404 is returned] -4. **Permission denied**: [when 403 is returned] - -## Open Questions / TODOs -[Anything unresolved, pending PM decision, or needs frontend input. If none, omit section.] -``` +## When Things Go Wrong ---- +| Situation | Likely Cause | Recovery | +|-----------|-------------|----------| +| Frontend says "we're getting a different response shape than documented" | Handoff written from spec, not from live code | Re-read the actual handler/serializer; update handoff; increment version suffix | +| Frontend asks "what does status X mean?" | Enum listed without business meaning or display label | Add `Meaning` and `Display Label` columns to enum table; see `references/TEMPLATE.md` | +| Frontend asks "when do we get a 401 vs 403?" | Auth section only said "auth required" | Document exact condition for each: 401 = not authenticated, 403 = authenticated but lacks role/scope | +| Handoff is already v4 and frontend still has questions | Business logic section is absent or too thin | Write a dedicated state machine table for multi-status workflows; move endpoint docs to secondary | +| Handoff is 2000 lines and nobody reads it | Full template applied to trivial CRUD | Use the shortcut path; inline the example JSON; drop sections that add no information | + +## Workflow + +1. **Read the actual implementation** — controller/route handler, DTO/schema, middleware. Do not rely on memory or spec. +2. **Classify complexity** — use the Navigation decision table to pick full vs. shortcut path. +3. **Fill the template** — see `references/TEMPLATE.md` for the full document scaffold. +4. **Write to file** — save to `.claude/docs/ai//api-handoff.md`. Use `-v2`, `-v3` suffixes for iterations. +5. **Verify before saving** — every field name, type, and status code must match the live implementation. + +**Output rule**: Write the markdown directly to the file. Do not echo it in chat. Reference the file path in your response only. + +If the project has an OpenAPI spec, recommend openapi-to-typescript to the frontend team as an alternative to hand-reading the handoff doc — it generates typed interfaces directly from the spec. + +## Reference Files -## Rules -- **NO CHAT OUTPUT**—produce only the handoff markdown block, nothing else. -- Be precise: types, constraints, examples—not vague prose. -- Include real example payloads where helpful. -- Surface non-obvious behaviors—don't assume frontend will "just know." -- If backend made trade-offs or assumptions, document them. -- Keep it scannable: headers, tables, bullets, code blocks. -- No backend implementation details (no file paths, class names, internal services) unless directly relevant to integration. -- If something is incomplete or TBD, say so explicitly. - -## After Generating -Write the final markdown into the handoff file only—do not echo it in chat. (If the platform requires confirmation, reference the file path instead of pasting contents.) +- `references/TEMPLATE.md` — Full handoff document scaffold with all sections diff --git a/skills/backend-to-frontend-handoff-docs/references/TEMPLATE.md b/skills/backend-to-frontend-handoff-docs/references/TEMPLATE.md new file mode 100644 index 0000000..e99cb3a --- /dev/null +++ b/skills/backend-to-frontend-handoff-docs/references/TEMPLATE.md @@ -0,0 +1,161 @@ +# API Handoff Template + +Use this scaffold for full handoffs (complex business logic, state machines, non-trivial validation). +For simple CRUD, use the shortcut: endpoint + example JSON + error codes only. + +--- + +```markdown +# API Handoff: [Feature Name] +**Version**: v1 | **Date**: YYYY-MM-DD | **Backend author**: [name/handle] + +## Business Context +[2–4 sentences: What problem does this solve? Who is the user? What domain terms does frontend need? What invariants does the backend enforce that frontend must understand?] + +--- + +## Endpoints + +### [METHOD] /path/to/endpoint + +- **Purpose**: [One line: what this endpoint does from the consumer's perspective] +- **Auth**: [Exact header: `Authorization: Bearer ` with scope `expenses:approve`] OR [`public`] + - 401 → not authenticated (redirect to login) + - 403 → authenticated but lacks role `manager` (show permission error, do not retry) +- **Request**: + ```json + { + "field": "value — type, constraints (e.g., required, max 500 chars, must match /^[A-Z]{2}$/)" + } + ``` +- **Response** (200/201): + ```json + { + "field": "value — type, nullability (e.g., string | null), meaning" + } + ``` +- **Response** (error): + | Status | Condition | Body shape | + |--------|-----------|------------| + | 400 | Malformed JSON | `{"error": "bad_request"}` | + | 422 | Validation failure | `{"errors": [{"field": "amount", "code": "too_large", "message": "..."}]}` | + | 409 | Duplicate submission within window | `{"code": "duplicate_submission", "retryAfter": 86400}` | + | 404 | Resource not found or soft-deleted | `{"error": "not_found"}` | +- **Notes**: [Rate limits, pagination cursor vs. offset, idempotency key, ordering guarantees, anything non-obvious] + +[Repeat block for each endpoint] + +--- + +## Data Models / DTOs + +```typescript +// Exact TypeScript shape frontend should use for typing +interface ExampleDto { + id: number; // always present + status: 'pending' | 'approved' | 'rejected'; // exhaustive — no other values + comment: string | null; // present-but-null when no comment (not absent) + createdAt: string; // ISO 8601 UTC, e.g. "2026-01-18T10:30:00Z" + approvedBy?: { // absent (not null) when status !== 'approved' + id: number; + displayName: string; + }; +} +``` + +**Nullability conventions used in this API**: +- `field?: T` — key absent from response (use `?.` access) +- `field: T | null` — key present, value null (use `=== null` check) + +--- + +## Enums & Constants + +| Value | Meaning | Display Label | Terminal? | +|-------|---------|---------------|-----------| +| `pending` | Submitted, awaiting review | Pending | No | +| `approved` | Approved by manager | Approved | Yes | +| `rejected` | Rejected by manager | Rejected | Yes | + +**Is this enum exhaustive?** YES — backend will not add values without a major version bump. +(If NO: frontend must handle unknown values gracefully, e.g., display as "Unknown status".) + +--- + +## Validation Rules + +Frontend should mirror these for UX (backend enforces them authoritatively): + +| Field | Rule | UX implication | +|-------|------|----------------| +| `amount` | Required, positive number, max 2 decimal places, ≤ 10000 | Show currency input; cap at 10,000; disable submit if 0 | +| `comment` | Optional, max 500 chars, trimmed | Show character counter at 400+ chars | +| `category` | Required, must match enum values | Dropdown, not free text | + +--- + +## Business Logic & State Machine + +[Use when the feature has meaningful state transitions. Skip for stateless CRUD.] + +``` +pending ──[approve]──► approved (terminal) +pending ──[reject]───► rejected (terminal) +``` + +- **Transition rules enforced by backend** (frontend must handle the error, not prevent the action client-side): + - Only users with role `manager` can approve/reject + - Cannot approve own submissions + - Terminal states cannot transition (422 with `code: already_finalized`) + - Approval window: must occur within 30 days of submission (422 with `code: window_expired`) + +--- + +## Integration Notes + +- **Recommended flow**: [e.g., "Fetch list (GET /expenses) → select item → open modal → POST /expenses/:id/approve → refresh list item via GET /expenses/:id"] +- **Optimistic UI**: [SAFE / NOT SAFE — reason. "Not safe: permission checks on approve depend on server-side role resolution."] +- **Caching**: [e.g., "List response includes `Cache-Control: max-age=30`. Invalidate on POST/PATCH to this resource."] +- **Real-time**: [e.g., "No websocket. Poll GET /expenses/:id every 5s while status=pending if you need live updates."] +- **Pagination**: [cursor-based: `?cursor=&limit=25` | offset-based: `?page=1&pageSize=25` | none] + +--- + +## Test Scenarios + +Cover these in frontend integration tests or manual QA: + +1. **Happy path**: [e.g., "Manager approves pending expense → 200 → UI shows 'Approved' badge"] +2. **Validation error**: [e.g., "Submit with amount=0 → 422 → show inline field error"] +3. **Auth failure**: [e.g., "Non-manager clicks approve → 403 → show 'Permission denied' toast, do not navigate away"] +4. **Conflict**: [e.g., "Approve already-approved expense → 422 `already_finalized` → show 'Already processed' message"] +5. **Not found**: [e.g., "Fetch deleted expense → 404 → redirect to list with 'Item no longer available' message"] + +--- + +## Open Questions / TODOs + +| Question | Owner | Deadline | +|----------|-------|----------| +| [e.g., "Should partial approvals be supported in Q2?"] | [PM name] | [date] | + +[Omit section entirely if none.] +``` + +--- + +## Shortcut Template (Simple CRUD) + +For endpoints with no business logic, obvious field names, standard CRUD semantics: + +```markdown +# API Handoff: [Feature Name] (Minimal) + +### GET /api/users/:id +- **Auth**: Bearer token required (any authenticated user) +- **Response** (200): + ```json + { "id": 1, "email": "user@example.com", "displayName": "Jane Smith", "avatarUrl": "https://..." } + ``` +- 404 if user not found or soft-deleted. +``` diff --git a/skills/c4-architecture/SKILL.md b/skills/c4-architecture/SKILL.md index ed972bc..7fd49b4 100644 --- a/skills/c4-architecture/SKILL.md +++ b/skills/c4-architecture/SKILL.md @@ -7,26 +7,80 @@ description: Generate architecture documentation using C4 model Mermaid diagrams Generate software architecture documentation using C4 model diagrams in Mermaid syntax. -## Workflow +## Mindset + +Expert C4 practitioners think in these terms — not in diagram syntax: + +1. **Audience first, diagram second.** The question is never "which diagram type?" — it is "who needs to understand what, and what do they already know?" Context diagrams for executives; Container + Deployment for DevOps; Component only when a developer is lost navigating the codebase. + +2. **Containers are deployment units, not logical groupings.** If you cannot `docker run` it, `kubectl apply` it, or deploy it independently, it is a Component — not a Container. Misclassifying this one thing causes 80% of C4 diagram confusion. + +3. **Stop at the level that answers the question.** Creating Level 3 Component diagrams "just in case" adds maintenance burden with no clarity gain. Context + Container diagrams are sufficient for most teams. + +4. **Name relationships with verb phrases, not nouns.** "Reads customer data from" is better than "Database connection". Relationships carry meaning; labels are not decoration. + +5. **Diagram sprawl is worse than no diagram.** One accurate Context diagram beats ten stale Component diagrams. Keep diagrams minimal enough that they stay current. -1. **Understand scope** - Determine which C4 level(s) are needed based on audience -2. **Analyze codebase** - Explore the system to identify components, containers, and relationships -3. **Generate diagrams** - Create Mermaid C4 diagrams at appropriate abstraction levels -4. **Document** - Write diagrams to markdown files with explanatory context +## Navigation -## C4 Diagram Levels +**Use this skill when:** +- Asked to document, visualize, or diagram system architecture +- Onboarding new engineers who need a map of the system +- Planning a new system or major feature and need to communicate design +- Performing architecture review and need a baseline -Select the appropriate level based on the documentation need: +**Do NOT use this skill when:** +- The user wants a sequence diagram for a single request flow → use a standard Mermaid sequence diagram instead +- The user wants an entity-relationship diagram → use Mermaid `erDiagram` +- The user wants a network topology diagram with physical hardware detail → use a deployment diagram only if C4 conventions fit, otherwise note the limitation -| Level | Diagram Type | Audience | Shows | When to Create | -|-------|-------------|----------|-------|----------------| -| 1 | **C4Context** | Everyone | System + external actors | Always (required) | -| 2 | **C4Container** | Technical | Apps, databases, services | Always (required) | -| 3 | **C4Component** | Developers | Internal components | Only if adds value | -| 4 | **C4Deployment** | DevOps | Infrastructure nodes | For production systems | -| - | **C4Dynamic** | Technical | Request flows (numbered) | For complex workflows | +### Decision Tree: Which level(s) to generate? -**Key Insight:** "Context + Container diagrams are sufficient for most software development teams." Only create Component/Code diagrams when they genuinely add value. +``` +Start: Who is the primary audience? +│ +├─ Non-technical (executives, product) → Level 1 ONLY (C4Context) +│ +├─ Technical but not hands-on (architects, TPMs) → Level 1 + Level 2 (C4Container) +│ +├─ Developers unfamiliar with the codebase → Level 1 + Level 2 + Level 3 for the specific area in question (C4Component) +│ +├─ DevOps / SRE / infra → Level 2 + Level 4 (C4Container + C4Deployment) +│ +└─ Complex async workflow needs explaining → Add C4Dynamic for that flow only +``` + +### Decision Tree: How to model microservices ownership? + +``` +Microservice owned by... +│ +├─ Same team as the system being documented → model as Container inside a System_Boundary +│ +└─ Different team → model as System_Ext at Level 1; only expand to Container in that team's own diagram +``` + +### Decision Tree: How to model a message broker (Kafka, RabbitMQ, SQS)? + +``` +Does the diagram need to show data flows? +│ +├─ Yes → show individual topics/queues as ContainerQueue elements; do NOT show "Kafka" as a single box +│ +└─ No (just showing system exists) → single System or Container named for the broker is acceptable +``` + +## Philosophy + +The C4 model's power is that each level answers exactly one question for one audience — the moment a diagram tries to answer two questions, it fails both. Generate diagrams at the lowest level of abstraction that resolves the audience's actual question. + +## Workflow + +1. **Identify audience and question** — determine who will read this and what decision it supports +2. **Select level(s)** — use the decision trees above; default to Level 1 + Level 2 only +3. **Analyze codebase** — identify containers, external systems, and key relationships +4. **Generate diagrams** — write Mermaid C4 diagrams at selected levels +5. **Document** — write to `docs/architecture/` with standard naming (see Output Location) ## Quick Start Examples @@ -61,199 +115,7 @@ C4Container Rel(pinia, indexeddb, "Persists", "Dexie ORM") ``` -### Component Diagram (Level 3) -```mermaid -C4Component - title Component Diagram - Workout Feature - - Container(views, "Views", "Vue Router pages") - - Container_Boundary(workout, "Workout Feature") { - Component(useWorkout, "useWorkout", "Composable", "Workout execution state") - Component(useTimer, "useTimer", "Composable", "Timer state machine") - Component(workoutRepo, "WorkoutRepository", "Dexie", "Workout persistence") - } - - Rel(views, useWorkout, "Uses") - Rel(useWorkout, useTimer, "Controls") - Rel(useWorkout, workoutRepo, "Saves to") -``` - -### Dynamic Diagram (Request Flow) -```mermaid -C4Dynamic - title Dynamic Diagram - User Sign In Flow - - ContainerDb(db, "Database", "PostgreSQL", "User credentials") - Container(spa, "Single-Page App", "React", "Banking UI") - - Container_Boundary(api, "API Application") { - Component(signIn, "Sign In Controller", "Express", "Auth endpoint") - Component(security, "Security Service", "JWT", "Validates credentials") - } - - Rel(spa, signIn, "1. Submit credentials", "JSON/HTTPS") - Rel(signIn, security, "2. Validate") - Rel(security, db, "3. Query user", "SQL") - - UpdateRelStyle(spa, signIn, $textColor="blue", $offsetY="-30") -``` - -### Deployment Diagram -```mermaid -C4Deployment - title Deployment Diagram - Production - - Deployment_Node(browser, "Customer Browser", "Chrome/Firefox") { - Container(spa, "SPA", "React", "Web application") - } - - Deployment_Node(aws, "AWS Cloud", "us-east-1") { - Deployment_Node(ecs, "ECS Cluster", "Fargate") { - Container(api, "API Service", "Node.js", "REST API") - } - Deployment_Node(rds, "RDS", "db.r5.large") { - ContainerDb(db, "Database", "PostgreSQL", "Application data") - } - } - - Rel(spa, api, "API calls", "HTTPS") - Rel(api, db, "Reads/writes", "JDBC") -``` - -## Element Syntax - -### People and Systems -``` -Person(alias, "Label", "Description") -Person_Ext(alias, "Label", "Description") # External person -System(alias, "Label", "Description") -System_Ext(alias, "Label", "Description") # External system -SystemDb(alias, "Label", "Description") # Database system -SystemQueue(alias, "Label", "Description") # Queue system -``` - -### Containers -``` -Container(alias, "Label", "Technology", "Description") -Container_Ext(alias, "Label", "Technology", "Description") -ContainerDb(alias, "Label", "Technology", "Description") -ContainerQueue(alias, "Label", "Technology", "Description") -``` - -### Components -``` -Component(alias, "Label", "Technology", "Description") -Component_Ext(alias, "Label", "Technology", "Description") -ComponentDb(alias, "Label", "Technology", "Description") -``` - -### Boundaries -``` -Enterprise_Boundary(alias, "Label") { ... } -System_Boundary(alias, "Label") { ... } -Container_Boundary(alias, "Label") { ... } -Boundary(alias, "Label", "type") { ... } -``` - -### Relationships -``` -Rel(from, to, "Label") -Rel(from, to, "Label", "Technology") -BiRel(from, to, "Label") # Bidirectional -Rel_U(from, to, "Label") # Upward -Rel_D(from, to, "Label") # Downward -Rel_L(from, to, "Label") # Leftward -Rel_R(from, to, "Label") # Rightward -``` - -### Deployment Nodes -``` -Deployment_Node(alias, "Label", "Type", "Description") { ... } -Node(alias, "Label", "Type", "Description") { ... } # Shorthand -``` - -## Styling and Layout - -### Layout Configuration -``` -UpdateLayoutConfig($c4ShapeInRow="3", $c4BoundaryInRow="1") -``` -- `$c4ShapeInRow` - Number of shapes per row (default: 4) -- `$c4BoundaryInRow` - Number of boundaries per row (default: 2) - -### Element Styling -``` -UpdateElementStyle(alias, $fontColor="red", $bgColor="grey", $borderColor="red") -``` - -### Relationship Styling -``` -UpdateRelStyle(from, to, $textColor="blue", $lineColor="blue", $offsetX="5", $offsetY="-10") -``` -Use `$offsetX` and `$offsetY` to fix overlapping relationship labels. - -## Best Practices - -### Essential Rules - -1. **Every element must have**: Name, Type, Technology (where applicable), and Description -2. **Use unidirectional arrows only** - Bidirectional arrows create ambiguity -3. **Label arrows with action verbs** - "Sends email using", "Reads from", not just "uses" -4. **Include technology labels** - "JSON/HTTPS", "JDBC", "gRPC" -5. **Stay under 20 elements per diagram** - Split complex systems into multiple diagrams - -### Clarity Guidelines - -1. **Start at Level 1** - Context diagrams help frame the system scope -2. **One diagram per file** - Keep diagrams focused on a single abstraction level -3. **Meaningful aliases** - Use descriptive aliases (e.g., `orderService` not `s1`) -4. **Concise descriptions** - Keep descriptions under 50 characters when possible -5. **Always include a title** - "System Context diagram for [System Name]" - -### What to Avoid - -See [references/common-mistakes.md](references/common-mistakes.md) for detailed anti-patterns: -- Confusing containers (deployable) vs components (non-deployable) -- Modeling shared libraries as containers -- Showing message brokers as single containers instead of individual topics -- Adding undefined abstraction levels like "subcomponents" -- Removing type labels to "simplify" diagrams - -## Microservices Guidelines - -### Single Team Ownership -Model each microservice as a **container** (or container group): -```mermaid -C4Container - title Microservices - Single Team - - System_Boundary(platform, "E-commerce Platform") { - Container(orderApi, "Order Service", "Spring Boot", "Order processing") - ContainerDb(orderDb, "Order DB", "PostgreSQL", "Order data") - Container(inventoryApi, "Inventory Service", "Node.js", "Stock management") - ContainerDb(inventoryDb, "Inventory DB", "MongoDB", "Stock data") - } -``` - -### Multi-Team Ownership -Promote microservices to **software systems** when owned by separate teams: -```mermaid -C4Context - title Microservices - Multi-Team - - Person(customer, "Customer", "Places orders") - System(orderSystem, "Order System", "Team Alpha") - System(inventorySystem, "Inventory System", "Team Beta") - System(paymentSystem, "Payment System", "Team Gamma") - - Rel(customer, orderSystem, "Places orders") - Rel(orderSystem, inventorySystem, "Checks stock") - Rel(orderSystem, paymentSystem, "Processes payment") -``` - -### Event-Driven Architecture -Show individual topics/queues as containers, NOT a single "Kafka" box: +### Event-Driven Architecture (Kafka/queues) ```mermaid C4Container title Event-Driven Architecture @@ -269,14 +131,36 @@ C4Container Rel(orderService, stockTopic, "Subscribes to") ``` +> Full syntax for all element types, styling, and layout: [references/c4-syntax.md](references/c4-syntax.md) + +## NEVER + +- **NEVER model a Java/Python class or module as a Container.** Containers are independently deployable. Misclassifying creates diagrams that look technical but communicate nothing — developers will correctly distrust them. +- **NEVER show a message broker (Kafka, RabbitMQ) as a single container when data flows matter.** A "Kafka" box hides which services are coupled to which topics — the exact information the diagram needs to convey. +- **NEVER create Component diagrams without a specific developer question driving them.** Component diagrams become stale within weeks of code changes; they cost more to maintain than they save unless targeting a specific navigation problem. +- **NEVER use bidirectional arrows (`BiRel`) as a default.** Most relationships have a initiator and a responder. `BiRel` signals "I wasn't sure" and destroys the ability to trace data flows through the diagram. +- **NEVER label relationships with nouns alone** (e.g., "API", "Database"). Noun-only labels make the diagram look complete while conveying nothing. Always use a verb phrase: "Submits orders via", "Reads config from". +- **NEVER put implementation detail (method names, SQL queries, config keys) in a C4 diagram.** C4 diagrams communicate structure and intent at a business-meaningful level; implementation belongs in code comments and ADRs. +- **NEVER remove type labels from elements to "simplify" the diagram.** The technology label (e.g., "PostgreSQL", "React") is load-bearing — it tells the reader what kind of thing this is and what constraints apply. + +## When Things Go Wrong + +| Symptom | Likely cause | Fix | +|---------|-------------|-----| +| Mermaid renders a blank diagram | Missing diagram type keyword (`C4Context`, `C4Container`, etc.) or syntax error in element line | Validate each line; check that all `{` boundaries are closed | +| Diagram is unreadably crowded | Too many elements at one level | Split: use `System_Ext` to collapse external systems; create separate diagram per bounded context | +| Reviewers argue about what counts as a "container" | Mixing deployable units with logical groupings | Reframe: "can this be restarted/scaled independently?" — yes = Container, no = Component | +| Event-driven system looks like spaghetti | Showing broker as single node with many arrows | Replace single broker box with individual `ContainerQueue` elements per topic; group by owning service | +| Multi-team microservices diagram is unnavigable | All services at same level regardless of ownership | Apply ownership decision tree: cross-team services become `System_Ext` at Level 1 | + ## Output Location Write architecture documentation to `docs/architecture/` with naming convention: -- `c4-context.md` - System context diagram -- `c4-containers.md` - Container diagram -- `c4-components-{feature}.md` - Component diagrams per feature -- `c4-deployment.md` - Deployment diagram -- `c4-dynamic-{flow}.md` - Dynamic diagrams for specific flows +- `c4-context.md` — System context diagram +- `c4-containers.md` — Container diagram +- `c4-components-{feature}.md` — Component diagrams per feature +- `c4-deployment.md` — Deployment diagram +- `c4-dynamic-{flow}.md` — Dynamic diagrams for specific flows ## Audience-Appropriate Detail @@ -290,6 +174,6 @@ Write architecture documentation to `docs/architecture/` with naming convention: ## References -- [references/c4-syntax.md](references/c4-syntax.md) - Complete Mermaid C4 syntax -- [references/common-mistakes.md](references/common-mistakes.md) - Anti-patterns to avoid -- [references/advanced-patterns.md](references/advanced-patterns.md) - Microservices, event-driven, deployment +- [references/c4-syntax.md](references/c4-syntax.md) — Complete Mermaid C4 syntax (all element types, boundaries, relationships, styling, layout) +- [references/common-mistakes.md](references/common-mistakes.md) — Anti-patterns with corrected examples +- [references/advanced-patterns.md](references/advanced-patterns.md) — Microservices, event-driven, multi-team, deployment patterns diff --git a/skills/codex/SKILL.md b/skills/codex/SKILL.md index 82f09ba..036d063 100644 --- a/skills/codex/SKILL.md +++ b/skills/codex/SKILL.md @@ -1,66 +1,94 @@ --- name: codex -description: Use when the user asks to run Codex CLI (codex exec, codex resume) or references OpenAI Codex for code analysis, refactoring, or automated editing. Uses GPT-5.2 by default for state-of-the-art software engineering. +description: Use when the user asks to run Codex CLI (codex exec, codex resume), invokes OpenAI Codex for code analysis, refactoring, automated editing, or multi-step agentic coding workflows. Trigger phrases: "use codex", "run codex", "codex resume", "codex analyze", "codex refactor". Uses GPT-5.2 by default. --- -# Codex Skill Guide +## Mindset + +- **Sandbox discipline first** — the sandbox choice is the highest-stakes decision. Read-only is the safe default; escalate only when the task genuinely cannot complete without writes or network. Never let convenience drive you to `danger-full-access`. +- **Stderr is noise, not signal** — Codex streams thinking tokens to stderr. Suppress with `2>/dev/null` by default; only surface stderr when the user explicitly asks for reasoning traces or when diagnosing a failure. +- **Resume sessions, don't restart them** — re-running `codex exec` from scratch loses accumulated context. If a prior session exists and the user wants to continue, always reach for `resume --last`. +- **Flag placement on resume is load-bearing** — flags between `exec` and `resume` set session-level config; flags after `resume` are task-level. Mixing them silently corrupts behavior. +- **Reasoning effort multiplies cost non-linearly** — `xhigh` can run 5–10× longer than `medium` on hard tasks. Match effort to actual complexity; don't default to max. + +## Navigation + +**Use this skill when**: +- User says "use codex", "run codex", "codex exec", "codex resume", or "codex analyze/refactor" +- Task requires an agentic coding loop (plan → edit → verify) across multiple files +- User wants to leverage GPT-5.2's SWE-bench-optimized reasoning on a codebase + +**Do NOT use this skill when**: +- The user wants Claude Code itself to edit files (no Codex invocation needed) +- Quick single-file edits that Claude can perform directly — Codex overhead isn't worth it +- User hasn't confirmed `codex --version` works (unresolved install errors will silently fail) + +**Sandbox selection** (most common ambiguity): + +``` +Writing files? + NO → read-only + YES → Network or subprocess needed? + NO → workspace-write + YES → danger-full-access (ask user first) +``` + +## Philosophy + +Codex is a second reasoning engine, not a shell shortcut. Use it when the problem requires sustained multi-step agency — planning, editing, verifying — that exceeds what a single Claude Code turn can deliver reliably. + +## NEVER + +- **NEVER use `--sandbox danger-full-access` without explicit user confirmation** — it grants process spawning, arbitrary network access, and filesystem writes with no guardrails; a single bad prompt can modify or exfiltrate files outside the working directory. +- **NEVER omit `--skip-git-repo-check`** — without it, Codex aborts when invoked outside a git root, and the error message is cryptic enough that users assume Codex is broken rather than misconfigured. +- **NEVER pass model or effort flags after `resume --last`** — they appear to be accepted but are silently ignored; the session inherits its original config. Passing them misleads the user into thinking they changed the behavior. +- **NEVER show stderr by default** — thinking tokens flood Claude Code's context window, consuming tokens that degrade subsequent turns. Suppress with `2>/dev/null` unless the user explicitly asks for reasoning traces. +- **NEVER restart a session when `resume --last` exists** — restarting drops accumulated file context, tool call history, and any partial edits Codex was tracking. Resume is almost always correct. +- **NEVER use `xhigh` reasoning on simple tasks** — it can run 5–10× longer and cost significantly more without improving output quality on straightforward changes like formatting or minor bug fixes. +- **NEVER skip `codex --version` when the user reports unexpected behavior** — CLI version mismatches (pre-v0.57.0) silently fall back to older models rather than erroring, making GPT-5.2 features unavailable. + +## When Things Go Wrong + +| Situation | Likely Cause | Recovery | +|-----------|-------------|----------| +| `codex exec` exits non-zero immediately | Missing `--skip-git-repo-check` outside git root, or CLI not on PATH | Add flag; verify `codex --version` succeeds | +| Resume produces wrong model/effort | Flags placed after `resume` keyword instead of between `exec` and `resume` | Correct flag order: `codex exec [flags] --skip-git-repo-check resume --last` | +| Output is empty or truncated | stderr not suppressed, thinking tokens consumed context budget | Add `2>/dev/null`; if debugging, redirect stderr to a file instead | +| Task completes but no edits appear | `read-only` sandbox used for an edit task | Rerun with `--sandbox workspace-write --full-auto` | +| Cost/time far exceeds expectation | Reasoning effort set to `xhigh` or `high` for a simple task | Drop to `medium` or `low`; verify with `model_reasoning_effort` config | + +--- ## Running a Task -1. Default to `gpt-5.2` model. Ask the user (via `AskUserQuestion`) which reasoning effort to use (`xhigh`,`high`, `medium`, or `low`). User can override model if needed (see Model Options below). -2. Select the sandbox mode required for the task; default to `--sandbox read-only` unless edits or network access are necessary. -3. Assemble the command with the appropriate options: - - `-m, --model ` - - `--config model_reasoning_effort=""` - - `--sandbox ` - - `--full-auto` - - `-C, --cd ` - - `--skip-git-repo-check` -3. Always use --skip-git-repo-check. -4. When continuing a previous session, use `codex exec --skip-git-repo-check resume --last` via stdin. When resuming don't use any configuration flags unless explicitly requested by the user e.g. if he species the model or the reasoning effort when requesting to resume a session. Resume syntax: `echo "your prompt here" | codex exec --skip-git-repo-check resume --last 2>/dev/null`. All flags have to be inserted between exec and resume. -5. **IMPORTANT**: By default, append `2>/dev/null` to all `codex exec` commands to suppress thinking tokens (stderr). Only show stderr if the user explicitly requests to see thinking tokens or if debugging is needed. -6. Run the command, capture stdout/stderr (filtered as appropriate), and summarize the outcome for the user. -7. **After Codex completes**, inform the user: "You can resume this Codex session at any time by saying 'codex resume' or asking me to continue with additional analysis or changes." - -### Quick Reference -| Use case | Sandbox mode | Key flags | -| --- | --- | --- | -| Read-only review or analysis | `read-only` | `--sandbox read-only 2>/dev/null` | -| Apply local edits | `workspace-write` | `--sandbox workspace-write --full-auto 2>/dev/null` | -| Permit network or broad access | `danger-full-access` | `--sandbox danger-full-access --full-auto 2>/dev/null` | -| Resume recent session | Inherited from original | `echo "prompt" \| codex exec --skip-git-repo-check resume --last 2>/dev/null` (no flags allowed) | -| Run from another directory | Match task needs | `-C ` plus other flags `2>/dev/null` | - -## Model Options - -| Model | Best for | Context window | Key features | -| --- | --- | --- | --- | -| `gpt-5.2-max` | **Max model**: Ultra-complex reasoning, deep problem analysis | 400K input / 128K output | 76.3% SWE-bench, adaptive reasoning, $1.25/$10.00 | -| `gpt-5.2` ⭐ | **Flagship model**: Software engineering, agentic coding workflows | 400K input / 128K output | 76.3% SWE-bench, adaptive reasoning, $1.25/$10.00 | -| `gpt-5.2-mini` | Cost-efficient coding (4x more usage allowance) | 400K input / 128K output | Near SOTA performance, $0.25/$2.00 | -| `gpt-5.1-thinking` | Ultra-complex reasoning, deep problem analysis | 400K input / 128K output | Adaptive thinking depth, runs 2x slower on hardest tasks | - -**GPT-5.2 Advantages**: 76.3% SWE-bench (vs 72.8% GPT-5), 30% faster on average tasks, better tool handling, reduced hallucinations, improved code quality. Knowledge cutoff: September 30, 2024. - -**Reasoning Effort Levels**: -- `xhigh` - Ultra-complex tasks (deep problem analysis, complex reasoning, deep understanding of the problem) -- `high` - Complex tasks (refactoring, architecture, security analysis, performance optimization) -- `medium` - Standard tasks (refactoring, code organization, feature additions, bug fixes) -- `low` - Simple tasks (quick fixes, simple changes, code formatting, documentation) - -**Cached Input Discount**: 90% off ($0.125/M tokens) for repeated context, cache lasts up to 24 hours. -## Following Up -- After every `codex` command, immediately use `AskUserQuestion` to confirm next steps, collect clarifications, or decide whether to resume with `codex exec resume --last`. -- When resuming, pipe the new prompt via stdin: `echo "new prompt" | codex exec resume --last 2>/dev/null`. The resumed session automatically uses the same model, reasoning effort, and sandbox mode from the original session. -- Restate the chosen model, reasoning effort, and sandbox mode when proposing follow-up actions. +1. Check `codex --version` if behavior is unexpected (requires v0.57.0+ for GPT-5.2). +2. Ask user for reasoning effort via `AskUserQuestion` (`xhigh` / `high` / `medium` / `low`). Default model: `gpt-5.2`. +3. Select sandbox using the decision tree in **Navigation** above. +4. Assemble the command: + ```bash + codex exec \ + -m gpt-5.2 \ + --config model_reasoning_effort="" \ + --sandbox \ + --full-auto \ + --skip-git-repo-check \ + [-C ] \ + "your prompt" 2>/dev/null + ``` +5. After completion, offer resume: "You can continue this session by saying 'codex resume'." -## Error Handling -- Stop and report failures whenever `codex --version` or a `codex exec` command exits non-zero; request direction before retrying. -- Before you use high-impact flags (`--full-auto`, `--sandbox danger-full-access`, `--skip-git-repo-check`) ask the user for permission using AskUserQuestion unless it was already given. -- When output includes warnings or partial results, summarize them and ask how to adjust using `AskUserQuestion`. +## Resuming a Session -## CLI Version +```bash +echo "your follow-up prompt" | codex exec --skip-git-repo-check resume --last 2>/dev/null +``` -Requires Codex CLI v0.57.0 or later for GPT-5.2 model support. The CLI defaults to `gpt-5.2` on macOS/Linux and `gpt-5.2` on Windows. Check version: `codex --version` +All flags go between `exec` and `resume`. Do not add model/effort flags unless the user explicitly overrides them. + +## Following Up + +After every `codex exec`, use `AskUserQuestion` to confirm next steps or collect clarifications. Restate model, effort, and sandbox when proposing follow-up actions. + +--- -Use `/model` slash command within a Codex session to switch models, or configure default in `~/.codex/config.toml`. +**Full model/sandbox reference**: `references/model-sandbox-reference.md` diff --git a/skills/codex/references/model-sandbox-reference.md b/skills/codex/references/model-sandbox-reference.md new file mode 100644 index 0000000..c0e06f0 --- /dev/null +++ b/skills/codex/references/model-sandbox-reference.md @@ -0,0 +1,55 @@ +# Codex CLI — Model & Sandbox Reference + +## Model Comparison Table + +| Model | Best for | Context | Pricing (in/out per M) | +|-------|----------|---------|------------------------| +| `gpt-5.2-max` | Ultra-complex reasoning, deep architectural analysis | 400K / 128K | $1.25 / $10.00 | +| `gpt-5.2` ⭐ | Flagship — software engineering, agentic coding | 400K / 128K | $1.25 / $10.00 | +| `gpt-5.2-mini` | Cost-efficient; 4× usage headroom | 400K / 128K | $0.25 / $2.00 | +| `gpt-5.1-thinking` | Ultra-complex reasoning; runs ~2× slower on hardest tasks | 400K / 128K | varies | + +**Cached input discount**: 90% off ($0.125/M) for repeated context; cache lasts up to 24 hours. + +## Reasoning Effort Levels + +| Level | When to use | +|-------|-------------| +| `xhigh` | Deep problem analysis, complex multi-file reasoning | +| `high` | Refactoring, architecture, security analysis, performance | +| `medium` | Feature additions, bug fixes, code organization | +| `low` | Quick fixes, formatting, simple documentation changes | + +## Sandbox Mode Decision Tree + +``` +Does the task require writing files? +├── NO → --sandbox read-only (analysis, review, Q&A) +└── YES → Does it require network or process spawning? + ├── NO → --sandbox workspace-write (edits, refactors) + └── YES → --sandbox danger-full-access (install deps, run tests, CI) +``` + +## Quick Reference Command Table + +| Use case | Sandbox | Key flags | +|----------|---------|-----------| +| Read-only review/analysis | `read-only` | `--sandbox read-only 2>/dev/null` | +| Apply local edits | `workspace-write` | `--sandbox workspace-write --full-auto 2>/dev/null` | +| Network or broad access | `danger-full-access` | `--sandbox danger-full-access --full-auto 2>/dev/null` | +| Resume recent session | Inherited | `echo "prompt" \| codex exec --skip-git-repo-check resume --last 2>/dev/null` | +| Run from another directory | Match task | `-C ` plus other flags `2>/dev/null` | + +## Resume Session Rules + +- Resume syntax: `echo "your prompt" | codex exec --skip-git-repo-check resume --last 2>/dev/null` +- All flags go **between** `exec` and `resume` +- Do NOT pass model/effort flags on resume unless user explicitly requests — session inherits originals +- `--skip-git-repo-check` is always required, even on resume + +## CLI Version Requirements + +- Requires Codex CLI **v0.57.0+** for GPT-5.2 support +- Check: `codex --version` +- Switch model mid-session: `/model` slash command inside Codex session +- Default config: `~/.codex/config.toml` diff --git a/skills/command-creator/SKILL.md b/skills/command-creator/SKILL.md index bd27ac1..da40b08 100644 --- a/skills/command-creator/SKILL.md +++ b/skills/command-creator/SKILL.md @@ -1,210 +1,174 @@ --- name: command-creator -description: This skill should be used when creating a Claude Code slash command. Use when users ask to "create a command", "make a slash command", "add a command", or want to document a workflow as a reusable command. Essential for creating optimized, agent-executable slash commands with proper structure and best practices. +description: "Design and scaffold Claude Code slash commands (reusable workflow shortcuts invocable as /command-name). Covers prompt engineering for commands, parameter design, example cases, and command anti-patterns. Use when asked to automate a workflow as a slash command, create a /command, or turn a recurring prompt into a persistent tool. Triggers: slash command, create a command, /command-name pattern." --- # Command Creator -This skill guides the creation of Claude Code slash commands - reusable workflows that can be invoked with `/command-name` in Claude Code conversations. +This skill guides the creation of Claude Code slash commands — reusable workflows invoked with `/command-name` in Claude Code conversations. -## About Slash Commands - -Slash commands are markdown files stored in `.claude/commands/` (project-level) or `~/.claude/commands/` (global/user-level) that get expanded into prompts when invoked. They're ideal for: - -- Repetitive workflows (code review, PR submission, CI fixing) -- Multi-step processes that need consistency -- Agent delegation patterns -- Project-specific automation +## Mindset -## When to Use This Skill +Expert heuristics for command authorship: -Invoke this skill when users: +1. **Write for autonomy, not for humans.** Every instruction must be executable without clarifying questions. Vague steps ("check for errors") are failures — the agent will stall or hallucinate. +2. **The description IS the interface.** The frontmatter `description` is what appears in `/help`. A weak description means no one invokes the command correctly. Write it as an imperative action statement. +3. **Anti-patterns before steps.** Identify what NOT to do first. The costliest failures (batching todos, tool confusion, retry loops) come from omission, not malformed steps. +4. **Pattern before content.** Choose a workflow pattern (Analyze→Act→Report, Run→Fix→Repeat, etc.) before writing a single instruction line. Mismatched pattern = structurally broken command. +5. **Success criteria are mandatory, not optional.** A command without a defined stop condition will run forever or stop arbitrarily. Both are failures. -- Ask to "create a command" or "make a slash command" -- Want to automate a repetitive workflow -- Need to document a consistent process for reuse -- Say "I keep doing X, can we make a command for it?" -- Want to create project-specific or global commands +## Navigation -## Bundled Resources +**Use this skill when:** +- User asks to "create a command", "make a slash command", "add a command", "automate a workflow" +- User says "I keep doing X, can we make a command for it?" +- User wants to package a multi-step process for reuse +- User wants project-specific or global automation -This skill includes reference documentation for detailed guidance: +**Do NOT use this skill when:** +- User wants to run an existing command (just run it) +- User wants to modify Claude's behavior via settings.json (use update-config skill) +- User wants a script, not a slash command — scripts live in `scripts/`, not `.claude/commands/` -- **references/patterns.md** - Command patterns (workflow automation, iterative fixing, agent delegation, simple execution) -- **references/examples.md** - Real command examples with full source (submit-stack, ensure-ci, create-implementation-plan) -- **references/best-practices.md** - Quality checklist, common pitfalls, writing guidelines, template structure +**Decision tree — which pattern to recommend:** -Load these references as needed when creating commands to understand patterns, see examples, or ensure quality. +``` +Does the command fix/retry until passing? + YES → Iterative Fixing (Run→Parse→Fix→Repeat) + NO → Does it coordinate multiple agents? + YES → Agent Delegation (Context→Delegate→Iterate) + NO → Does it run one tool and return output? + YES → Simple Execution (Parse→Execute→Report) + NO → Workflow Automation (Analyze→Act→Report) +``` -## Command Structure Overview +**Location decision:** +``` +Is user inside a git repo? + YES → .claude/commands/ (project-level) + NO → ~/.claude/commands/ (global) +User says "global"? → Always ~/.claude/commands/ +User says "project"? → Always .claude/commands/ +``` -Every slash command is a markdown file with: +## Philosophy -```markdown ---- -description: Brief description shown in /help (required) -argument-hint: (optional, if command takes arguments) ---- +Commands are not documentation — they are agent programs. Every line must be an unambiguous, executable instruction. Optimize for the agent that will run it cold, without prior context, at 3am on a CI failure. -# Command Title +## NEVER -[Detailed instructions for the agent to execute autonomously] -``` +Anti-patterns that silently break commands — each has a non-obvious failure mode: -## Command Creation Workflow +1. **NEVER use second-person ("you should", "you need to").** Agents parse imperative instructions; second-person triggers a reasoning mode that introduces hesitation and paraphrasing, causing drift from the intended steps. +2. **NEVER omit a stop condition on iterative commands.** Without `max iterations: N` or stuck-detection logic, the agent enters an infinite loop on unfixable errors, burning context and never surfacing the root cause. +3. **NEVER put anti-patterns only in reference files.** If the NEVER list isn't visible when the command is invoked, the agent never reads it. Critical constraints must be inline. +4. **NEVER use underscores in command names** (`submit_stack` → broken). Claude Code uses the filename as the invocation key; underscores silently prevent the command from appearing in `/help`. +5. **NEVER batch todo completions.** Marking all todos done at the end defeats progress tracking and means a mid-execution failure looks like success. Mark each todo complete immediately after its task finishes. +6. **NEVER write vague description fields** (`description: A command for CI stuff`). The description is the only thing visible in `/help` — vague descriptions mean the command is never invoked for its intended purpose. +7. **NEVER use the Task tool when Bash tool is correct** (and vice versa). Task tool spins up a subagent (expensive, for delegation). Bash tool runs commands directly. Using Task for `make lint` wastes context; using Bash for multi-agent orchestration silently drops agent specialization. -### Step 1: Determine Location +## When Things Go Wrong -**Auto-detect the appropriate location:** +| Symptom | Root Cause | Fix | +|---|---|---| +| Command never appears in `/help` | Filename uses underscores instead of hyphens | Rename file to `kebab-case.md` | +| Agent asks clarifying questions mid-execution | Steps are vague or use second-person | Rewrite steps in imperative form with explicit expected outcomes | +| Agent loops forever on CI failure | No stuck-detection or max-iteration guard | Add `If same error appears 3 times: STOP and report` | +| Todos all marked done instantly | Instructions say "mark all complete at end" | Rewrite to mark each todo complete immediately after its step | +| Wrong location (project vs global) | Auto-detect not run, or user intent not checked | Run `git rev-parse --is-inside-work-tree` first; confirm with user | +| Command breaks on some machines | Hard-coded absolute paths in command body | Use relative paths or environment variables | -1. Check git repository status: `git rev-parse --is-inside-work-tree 2>/dev/null` -2. Default location: - - If in git repo → Project-level: `.claude/commands/` - - If not in git repo → Global: `~/.claude/commands/` -3. Allow user override: - - If user explicitly mentions "global" or "user-level" → Use `~/.claude/commands/` - - If user explicitly mentions "project" or "project-level" → Use `.claude/commands/` +## About Slash Commands -Report the chosen location to the user before proceeding. +Slash commands are markdown files stored in `.claude/commands/` (project-level) or `~/.claude/commands/` (global/user-level) that get expanded into prompts when invoked. They are ideal for: -### Step 2: Show Command Patterns +- Repetitive workflows (code review, PR submission, CI fixing) +- Multi-step processes that need consistency +- Agent delegation patterns +- Project-specific automation -Help the user understand different command types. Load **references/patterns.md** to see available patterns: +## Bundled Resources -- **Workflow Automation** - Analyze → Act → Report (e.g., submit-stack) -- **Iterative Fixing** - Run → Parse → Fix → Repeat (e.g., ensure-ci) -- **Agent Delegation** - Context → Delegate → Iterate (e.g., create-implementation-plan) -- **Simple Execution** - Run command with args (e.g., codex-review) +Load these references when needed — do not load all upfront: -Ask the user: "Which pattern is closest to what you want to create?" This helps frame the conversation. +- **references/patterns.md** — Command patterns with decision guides (Workflow Automation, Iterative Fixing, Agent Delegation, Simple Execution) +- **references/examples.md** — Full source for real commands (submit-stack, ensure-ci, create-implementation-plan) +- **references/best-practices.md** — Quality checklist, writing guidelines, template structure, advanced patterns -### Step 3: Gather Command Information +## Command Creation Workflow -Ask the user for key information: +### Step 1: Determine Location -#### A. Command Name and Purpose +Auto-detect the appropriate location: -Ask: +1. Check git repository status: `git rev-parse --is-inside-work-tree 2>/dev/null` +2. Default: + - In git repo → Project-level: `.claude/commands/` + - Not in git repo → Global: `~/.claude/commands/` +3. Override: if user says "global" → `~/.claude/commands/`; if user says "project" → `.claude/commands/` -- "What should the command be called?" (for filename) -- "What does this command do?" (for description field) +Report chosen location before proceeding. -Guidelines: +### Step 2: Select Pattern -- Command names MUST be kebab-case (hyphens, NOT underscores) - - ✅ CORRECT: `submit-stack`, `ensure-ci`, `create-from-plan` - - ❌ WRONG: `submit_stack`, `ensure_ci`, `create_from_plan` -- File names match command names: `my-command.md` → invoked as `/my-command` -- Description should be concise, action-oriented (appears in `/help` output) +Load **references/patterns.md** and use the decision tree above to recommend a pattern. Ask: "Which pattern is closest to what you want to create?" before writing a single instruction. -#### B. Arguments +### Step 3: Gather Command Information -Ask: +#### A. Name and Purpose -- "Does this command take any arguments?" -- "Are arguments required or optional?" -- "What should arguments represent?" +- Command name: kebab-case only (`submit-stack` ✅, `submit_stack` ❌) +- Description: imperative, action-oriented, for `/help` output -If command takes arguments: +#### B. Arguments -- Add `argument-hint: ` to frontmatter -- Use `` for required arguments -- Use `[square-brackets]` for optional arguments +- Does the command take arguments? Required or optional? +- `argument-hint: ` or `argument-hint: [optional]` in frontmatter #### C. Workflow Steps -Ask: - -- "What are the specific steps this command should follow?" -- "What order should they happen in?" -- "What tools or commands should be used?" - -Gather details about: - -- Initial analysis or checks to perform -- Main actions to take -- How to handle results -- Success criteria +- Initial checks (file existence, git status) +- Main actions and tool choices - Error handling approach +- Success criteria and stop conditions -#### D. Tool Restrictions and Guidance - -Ask: +#### D. Tool Restrictions -- "Should this command use any specific agents or tools?" -- "Are there any tools or operations it should avoid?" -- "Should it read any specific files for context?" +- Which tools to use (Bash vs Task vs Edit) +- Which tools to explicitly prohibit +- Any files to read for context ### Step 4: Generate Optimized Command -Create the command file with agent-optimized instructions. Load **references/best-practices.md** for: - -- Template structure -- Best practices for agent execution -- Writing style guidelines -- Quality checklist +Load **references/best-practices.md** before writing. Apply: -Key principles: - -- Use imperative/infinitive form (verb-first instructions) -- Be explicit and specific -- Include expected outcomes -- Provide concrete examples -- Define clear error handling +- Imperative/infinitive form (verb-first, never "you should") +- Explicit tool names per step +- Expected outcomes after each action +- Realistic examples (not foo/bar) +- Error handling with specific recovery actions +- Clear stop conditions ### Step 5: Create the Command File -1. Determine full file path: - - Project: `.claude/commands/[command-name].md` - - Global: `~/.claude/commands/[command-name].md` - -2. Ensure directory exists: - - ```bash - mkdir -p [directory-path] - ``` - +1. Determine full file path (project: `.claude/commands/[name].md`, global: `~/.claude/commands/[name].md`) +2. Ensure directory exists: `mkdir -p [directory-path]` 3. Write the command file using the Write tool - -4. Confirm with user: - - Report the file location - - Summarize what the command does - - Explain how to use it: `/command-name [arguments]` +4. Confirm with user: file location, what it does, how to invoke it ### Step 6: Test and Iterate (Optional) -If the user wants to test: - -1. Suggest testing: `You can test this command by running: /command-name [arguments]` -2. Be ready to iterate based on feedback -3. Update the file with improvements as needed +Suggest: `You can test this command by running: /command-name [arguments]` -## Quick Tips - -**For detailed guidance, load the bundled references:** - -- Load **references/patterns.md** when designing the command workflow -- Load **references/examples.md** to see how existing commands are structured -- Load **references/best-practices.md** before finalizing to ensure quality - -**Common patterns to remember:** - -- Use Bash tool for `pytest`, `pyright`, `ruff`, `prettier`, `make`, `gt` commands -- Use Task tool to invoke subagents for specialized tasks -- Check for specific files first (e.g., `.PLAN.md`) before proceeding -- Mark todos complete immediately, not in batches -- Include explicit error handling instructions -- Define clear success criteria +Be ready to iterate on feedback and update the file. ## Summary -When creating a command: - -1. **Detect location** (project vs global) -2. **Show patterns** to frame the conversation +1. **Detect location** (project vs global, confirm with user) +2. **Select pattern** using decision tree before writing 3. **Gather information** (name, purpose, arguments, steps, tools) 4. **Generate optimized command** with agent-executable instructions 5. **Create file** at appropriate location 6. **Confirm and iterate** as needed -Focus on creating commands that agents can execute autonomously, with clear steps, explicit tool usage, and proper error handling. +Focus on creating commands that agents can execute autonomously, with unambiguous steps, explicit tool usage, inline anti-patterns, and defined stop conditions. diff --git a/skills/commit-work/SKILL.md b/skills/commit-work/SKILL.md index 33d816e..c29a550 100644 --- a/skills/commit-work/SKILL.md +++ b/skills/commit-work/SKILL.md @@ -1,55 +1,104 @@ --- name: commit-work -description: "Create high-quality git commits: review/stage intended changes, split into logical commits, and write clear commit messages (including Conventional Commits). Use when the user asks to commit, craft a commit message, stage changes, or split work into multiple commits." +description: "Create high-quality git commits: review/stage intended changes, split into logical commits, write Conventional Commits messages. Trigger phrases: commit this work, craft a commit message, stage changes, split into commits, create a commit." --- -# Commit work - -## Goal -Make commits that are easy to review and safe to ship: -- only intended changes are included -- commits are logically scoped (split when needed) -- commit messages describe what changed and why - -## Inputs to ask for (if missing) -- Single commit or multiple commits? (If unsure: default to multiple small commits when there are unrelated changes.) -- Commit style: Conventional Commits are required. -- Any rules: max subject length, required scopes. - -## Workflow (checklist) -1) Inspect the working tree before staging - - `git status` - - `git diff` (unstaged) - - If many changes: `git diff --stat` -2) Decide commit boundaries (split if needed) - - Split by: feature vs refactor, backend vs frontend, formatting vs logic, tests vs prod code, dependency bumps vs behavior changes. - - If changes are mixed in one file, plan to use patch staging. -3) Stage only what belongs in the next commit - - Prefer patch staging for mixed changes: `git add -p` - - To unstage a hunk/file: `git restore --staged -p` or `git restore --staged ` -4) Review what will actually be committed - - `git diff --cached` - - Sanity checks: - - no secrets or tokens - - no accidental debug logging - - no unrelated formatting churn -5) Describe the staged change in 1-2 sentences (before writing the message) - - "What changed?" + "Why?" - - If you cannot describe it cleanly, the commit is probably too big or mixed; go back to step 2. -6) Write the commit message - - Use Conventional Commits (required): - - `type(scope): short summary` - - blank line - - body (what/why, not implementation diary) - - footer (BREAKING CHANGE) if needed - - Prefer an editor for multi-line messages: `git commit -v` - - Use `references/commit-message-template.md` if helpful. -7) Run the smallest relevant verification - - Run the repo's fastest meaningful check (unit tests, lint, or build) before moving on. -8) Repeat for the next commit until the working tree is clean - -## Deliverable -Provide: -- the final commit message(s) -- a short summary per commit (what/why) -- the commands used to stage/review (at minimum: `git diff --cached`, plus any tests run) +# Commit Work + +## Mindset + +- A commit is a unit of reviewability, not a unit of work. If a reviewer can't understand it in isolation, split it. +- The diff is wrong until proven otherwise. Run `git diff` before touching the index — surprises live there. +- Pre-commit hooks are enforcement, not suggestions. A failed hook means the commit did NOT happen; recover with a new commit, never `--amend` on the same HEAD. +- Bisect-safety trumps narrative coherence. A commit that breaks tests is more expensive than two awkward commits that don't. +- Staging is a separate thinking step. Deciding what to commit and how to describe it are different cognitive modes — don't conflate them. + +## Navigation + +**Use this skill when**: staging changes, crafting commit messages, splitting mixed changes into logical commits, applying Conventional Commits format. + +**Do NOT use this skill when**: pushing to remote, creating pull requests, resolving merge conflicts, or rebasing — those workflows carry different risk profiles and should be handled explicitly. + +**Quick decision tree**: +- Working tree has changes in 2+ unrelated concerns → split into multiple commits +- Changes touch secrets, generated files, or debug logs → exclude before staging +- Pre-commit hook failed → fix issue, re-stage, new commit (never amend) +- Repo is a shallow clone (`git rev-parse --is-shallow-repository` returns `true`) → do not amend; new commit only + +## Philosophy + +A commit is a promise to future readers: "this set of changes is coherent, tested, and describes its own intent." Violate that promise and you tax every future `git blame`, `git bisect`, and code review. + +## NEVER + +- NEVER run `git add -A` or `git add .` — these silently include `.env` files, build artifacts, and generated lock-file changes that belong in a separate commit or `.gitignore`. Review every path explicitly. +- NEVER `git commit --amend` in a shallow clone (`--depth N`) — amending rewrites the tip commit; a subsequent `push --force` will orphan the original on the remote and cannot be recovered without the SHA. Use a new commit instead. +- NEVER skip pre-commit hooks with `--no-verify` unless the user explicitly requests it and understands the consequence — hooks exist to catch secrets, linting errors, and test failures before they enter history. +- NEVER commit when `git status` shows a merge in progress (`MERGE_HEAD` exists) or a rebase is active (`REBASE_HEAD` / `rebase-merge/` directory present) — committing in this state creates a malformed merge commit or corrupts the rebase sequence. +- NEVER write a commit message that describes the implementation ("added null check on line 42") instead of the behavior ("prevent crash when user list is empty") — implementation is in the diff; the message must explain the intent. +- NEVER stage test file changes in the same commit as the production code they cover when the tests were written first (TDD) — the test-first commit is evidence of the design decision and has independent value in `git log`. +- NEVER assume `git diff --cached` is empty before committing — a prior interrupted session or a hook script can leave hunks staged without your knowledge. + +## Workflow + +**1. Inspect before touching the index** +``` +git status +git diff # unstaged +git diff --cached # already staged (should be empty at start) +``` +Check for: merge/rebase state, shallow clone, stale staged hunks. + +**2. Decide commit boundaries** +Split by: feature vs refactor, logic vs formatting, prod vs test, dep bump vs behavior change. +If a file contains mixed concerns, plan patch staging (`git add -p`) before touching the index. + +**3. Stage selectively** +``` +git add -p # preferred for mixed-concern files +git add # acceptable for clean single-concern files +``` +After staging: `git diff --cached` must match your intent exactly. + +**4. Safety check on staged diff** +Scan for: secrets/tokens, `console.log`/`debugger`/`TODO` left over, unrelated whitespace churn, generated files. + +**5. Describe the change in 1-2 sentences before writing the message** +If you cannot describe it cleanly → the commit is too big. Return to step 2. + +**6. Write the commit message** +See `references/commit-message-template.md`. Required format: +``` +type(scope): short imperative summary + +What changed and why — not how. + +BREAKING CHANGE: (if applicable) +``` +Use `git commit -v` to see the diff while composing. + +**7. Verify before moving on** +Run the repo's fastest meaningful check (unit tests, lint, typecheck). A commit that breaks CI is more expensive to fix than a 30-second lint run now. + +**8. Repeat until `git status` is clean** + +## When Things Go Wrong + +| Situation | Likely Cause | Recovery | +|-----------|-------------|----------| +| Pre-commit hook failed | Hook caught lint/test/secret issue | Fix the issue, `git add` the fix, create a NEW commit — do NOT `--amend` | +| Committed wrong hunks | `git add -A` or inattentive `git add -p` | `git reset HEAD~1` (soft) to unstage, re-stage correctly, new commit | +| `git commit --amend` in shallow clone | `--depth N` clone, rewrote tip | Recover original via `git fetch origin ` if you still have it; otherwise the original is gone unless the remote ref is intact | +| Merge/rebase in progress | `MERGE_HEAD` or `rebase-merge/` present | Complete or abort the merge/rebase first: `git merge --abort` / `git rebase --abort` | +| Hook added files you didn't stage | Formatter hook auto-staged changes | Run `git diff --cached` again after the hook; unstage unintended changes with `git restore --staged -p` | +| Bisect breaks on a commit | Mixed concerns in one commit | Future prevention: split logic/test/format; immediate: `git bisect skip` to mark untestable commits | + +## Bisect Safety Heuristics + +When splitting commits, order them so every intermediate state is buildable and testable: +1. Dependency/config changes first +2. New production code (feature/fix) second +3. Tests that exercise that code third +4. Formatting/cleanup last + +A commit series that cannot be bisected is a commit series that cannot be debugged. diff --git a/skills/crafting-effective-readmes/SKILL.md b/skills/crafting-effective-readmes/SKILL.md index a6c30d9..1fd4b48 100644 --- a/skills/crafting-effective-readmes/SKILL.md +++ b/skills/crafting-effective-readmes/SKILL.md @@ -1,78 +1,90 @@ --- name: crafting-effective-readmes -description: Use when writing or improving README files. Not all READMEs are the same — provides templates and guidance matched to your audience and project type. +description: Use when writing, improving, or reviewing README files. Triggers: "write a README", "improve my README", "update README", "my README is outdated", "create documentation for my project". Covers OSS, internal, personal, and config project types. --- # Crafting Effective READMEs -## Overview +## Mindset -READMEs answer questions your audience will have. Different audiences need different information - a contributor to an OSS project needs different context than future-you opening a config folder. +1. **The README is a cognitive funnel, not a table of contents.** Readers scan broad-to-specific: they decide in 10 seconds whether to read further. Every section either advances or stalls that funnel. Front-load the decision-enabling content (what + why + does it work for me). -**Always ask:** Who will read this, and what do they need to know? +2. **The documentation defines what the module does — the code does not.** A README without enough detail forces readers into source code. That's a failure. The goal is to keep users *out* of the source by providing everything needed to evaluate and use the project without reading implementation. -## Process +3. **Brevity is a feature, but incompleteness is a bug.** The ideal README is as short as it can be without being any shorter. When in doubt between too long and too short, choose too long — but put the excess in separate files, not in the README. + +4. **Write for the stranger, not the author.** After 6 months, even the author is a stranger to their own project. The README is a contract with your future confused self as much as with new users. + +5. **Match complexity to audience, not to effort.** A config folder README needs "what's here + why + gotchas" in 20 lines. An OSS library README needs install, quickstart, and API shape. Adding OSS sections to a config README is noise, not thoroughness. -### Step 1: Identify the Task +## Navigation -**Ask:** "What README task are you working on?" +**Use this skill when:** +- Writing a README from scratch for any project type +- Improving or restructuring an existing README +- Updating a README after project changes +- Reviewing a README for completeness or accuracy -| Task | When | -|------|------| -| **Creating** | New project, no README yet | -| **Adding** | Need to document something new | -| **Updating** | Capabilities changed, content is stale | -| **Reviewing** | Checking if README is still accurate | +**Do NOT use this skill when:** +- Writing other documentation (wikis, API docs, changelogs) — those have separate concerns +- Improving prose quality only — use `writing-clearly-and-concisely` instead +- The "README" is actually a CONTRIBUTING.md, CHANGELOG.md, or LICENSE — different audiences and formats -### Step 2: Task-Specific Questions +**Complexity decision tree:** -**Creating initial README:** -1. What type of project? (see Project Types below) -2. What problem does this solve in one sentence? -3. What's the quickest path to "it works"? -4. Anything notable to highlight? +``` +What is this project? +├── Config folder / dotfiles → Simple (20-40 lines): what's here, why, gotchas, how-to-extend +├── Personal/portfolio project → Medium (40-100 lines): what + stack + quick demo + learnings +├── Internal/team tool → Medium (60-120 lines): setup + architecture + runbooks + gotchas +└── OSS library/CLI → Full (100-300 lines): install + quickstart + API + contributing + license + └── Is it a published package? → Also add: badges, examples, support channels +``` -**Adding a section:** -1. What needs documenting? -2. Where should it go in the existing structure? -3. Who needs this info most? +## Philosophy -**Updating existing content:** -1. What changed? -2. Read current README, identify stale sections -3. Propose specific edits +A README is the one-stop shop that defines your project's contract with the world. It exists to keep users out of the source code. Everything else follows from that. -**Reviewing/refreshing:** -1. Read current README -2. Check against actual project state (package.json, main files, etc.) -3. Flag outdated sections -4. Update "Last reviewed" date if present +## NEVER -### Step 3: Always Ask +- NEVER open with implementation details before establishing what the project does — readers abandon before reaching the useful part. The description must appear in the first 5 lines. +- NEVER add badges to internal tools or config READMEs — badges signal "public OSS" and create noise for teammates who don't care about CI status on a dotfiles folder. +- NEVER write "Usage: See code for examples" or leave the usage section without at least one runnable example — this is the single most common reason a developer gives up and looks elsewhere. +- NEVER use a wall of prose for setup steps — numbered lists are not optional for installation; sequential prose causes missed steps and support requests. +- NEVER omit environment/OS prerequisites from install instructions — assuming "they know they need Node 18" is the most common cause of "it doesn't work" issues. State every non-obvious requirement. +- NEVER copy-paste a generic OSS template into an internal project — internal READMEs need runbooks and architecture, not badges and license text; wrong template creates confusion about intended audience. +- NEVER leave a README with a "Last Reviewed" date more than 6 months old without flagging it as potentially stale — a wrong README is worse than no README because it actively misleads. -After drafting, ask: **"Anything else to highlight or include that I might have missed?"** +## When Things Go Wrong -## Project Types +| Situation | Likely Cause | Recovery | +|-----------|-------------|----------| +| User can't figure out what the project does after reading | Description is buried below badges/ToC/motivation paragraphs | Move description to line 1-2; cut everything above it | +| Setup steps fail for new users but work for author | Prerequisites (OS version, env vars, global deps) assumed not stated | Add explicit Requirements subsection listing every non-obvious dependency | +| README feels too long but cutting feels wrong | Right content, wrong location — detail belongs in separate docs | Move API reference / deep examples to `docs/` or a wiki; link from README | +| Reviewer says README is "stale" | Project evolved but README wasn't updated as part of PRs | Add a "Last Reviewed" line; establish README-in-PR checklist habit | -| Type | Audience | Key Sections | Template | -|------|----------|--------------|----------| -| **Open Source** | Contributors, users worldwide | Install, Usage, Contributing, License | `templates/oss.md` | -| **Personal** | Future you, portfolio viewers | What it does, Tech stack, Learnings | `templates/personal.md` | -| **Internal** | Teammates, new hires | Setup, Architecture, Runbooks | `templates/internal.md` | -| **Config** | Future you (confused) | What's here, Why, How to extend, Gotchas | `templates/xdg-config.md` | +## Reference Loading Guide -**Ask the user** if unclear. Don't assume OSS defaults for everything. +Load references only when needed — don't pull all into context: -## Essential Sections (All Types) +| Situation | Load | +|-----------|------| +| Writing for a public OSS project | `references/art-of-readme.md` — cognitive funneling, brevity principles | +| Need section-by-section guidance | `references/make-a-readme.md` — concrete suggestions per section | +| OSS project needs a standardized format | `references/standard-readme-spec.md` + examples | +| Need a starting template | `templates/oss.md`, `templates/internal.md`, `templates/personal.md`, `templates/xdg-config.md` | +| Checking section completeness | `section-checklist.md` | + +## Process -Every README needs at minimum: +**Step 1 — Identify project type:** OSS / Personal / Internal / Config (ask if unclear; never assume OSS) -1. **Name** - Self-explanatory title -2. **Description** - What + why in 1-2 sentences -3. **Usage** - How to use it (examples help) +**Step 2 — Identify task:** +- Creating: use the matching template, ask "what problem in one sentence + quickest path to working" +- Updating: read current README, identify stale sections against actual project state +- Reviewing: check package.json/main files vs README claims; flag mismatches -## References +**Step 3 — Apply the three-section minimum:** Every README needs Name + Description (1-2 sentences, what + why) + Usage (with at least one example). -- `section-checklist.md` - Which sections to include by project type -- `style-guide.md` - Common README mistakes and prose guidance -- `using-references.md` - Guide to deeper reference materials +**Step 4 — After drafting:** Ask "Anything else to highlight or include that I might have missed?" diff --git a/skills/daily-meeting-update/SKILL.md b/skills/daily-meeting-update/SKILL.md index 190b070..13388b9 100644 --- a/skills/daily-meeting-update/SKILL.md +++ b/skills/daily-meeting-update/SKILL.md @@ -1,247 +1,151 @@ --- name: daily-meeting-update -description: "Interactive daily standup/meeting update generator. Use when user says 'daily', 'standup', 'scrum update', 'status update', 'what did I do yesterday', 'prepare for meeting', 'morning update', or 'team sync'. Pulls activity from GitHub, Jira, and Claude Code session history. Conducts 4-question interview (yesterday, today, blockers, discussion topics) and generates formatted Markdown update." +description: "Interactive daily standup/meeting update generator. Use when user says 'daily', 'standup', 'scrum update', 'status update', 'what did I do yesterday', 'prepare for meeting', 'morning update', 'team sync', 'async update', or 'remote standup'. Pulls activity from GitHub, Jira, and Claude Code session history. Conducts interview (yesterday, today, blockers, discussion topics) and generates formatted update for sync meetings, async Slack posts, or standup bots." user-invocable: true --- # Daily Meeting Update -Generate a daily standup/meeting update through an **interactive interview**. Never assume tools are configured—ask first. +Generate a daily standup/meeting update through an **interactive interview**. Never assume tools are configured — ask first. --- -## Workflow +## Mindset -``` -START - │ - ▼ -┌─────────────────────────────────────────────────────┐ -│ Phase 1: DETECT & OFFER INTEGRATIONS │ -│ • Check: Claude Code history? gh CLI? jira CLI? │ -│ • Claude Code → Pull yesterday's session digest │ -│ → User selects relevant items via multiSelect │ -│ • GitHub/Jira → Ask user, pull if approved │ -│ • Pull data NOW (before interview) │ -├─────────────────────────────────────────────────────┤ -│ Phase 2: INTERVIEW (with insights) │ -│ • Show pulled data as context │ -│ • Yesterday: "I see you merged PR #123, what else?" │ -│ • Today: What will you work on? │ -│ • Blockers: Anything blocking you? │ -│ • Topics: Anything to discuss at end of meeting? │ -├─────────────────────────────────────────────────────┤ -│ Phase 3: GENERATE UPDATE │ -│ • Combine interview answers + tool data │ -│ • Format as clean Markdown │ -│ • Present to user │ -└─────────────────────────────────────────────────────┘ -``` +1. **Tools surface what happened; the interview captures why it matters.** A merged PR without context ("I was blocked waiting for this") is noise. Always interview, even when you have full tool data. +2. **The Topics for Discussion question is highest-signal.** It captures cross-team dependencies, architectural decisions, and team-wide risks that no tool can detect. It's the one question you cannot skip. +3. **Async ≠ fewer questions — async updates are read without you present.** A vague sync standup is forgiven because teammates can ask. A vague async post blocks people for hours. Async format requires more precision, not less. +4. **One missed blocker cascades.** If a user is waiting on someone outside their team, that handoff won't happen unless stated explicitly. Surface it, name names, suggest the next action. +5. **Consent before access.** The user's repo list may include client work, personal projects, or sensitive branches they do not want surfaced in a standup. Always ask which repos; never enumerate everything visible. --- -## Phase 1: Detect & Offer Integrations - -### Step 1: Silent Detection - -Check for available integrations **silently** (suppress errors, don't show to user): - -| Integration | Detection | -|-------------|-----------| -| **Claude Code History** | `~/.claude/projects` directory exists with `.jsonl` files | -| GitHub CLI | `gh auth status` succeeds | -| Jira CLI | `jira` command exists | -| Atlassian MCP | `mcp__atlassian__*` tools available | -| Git | Inside a git repository | +## Navigation -### Step 2: Offer GitHub/Jira Integrations (if available) +### Use this skill when: +- User says "daily", "standup", "scrum", "morning update", "status update", "team sync" +- User asks "what did I do yesterday" or "help me prepare for my meeting" +- User needs an async Slack post, standup bot answer, or remote team update +- User is catching up after missed days (cover multiple days in "yesterday") -> **Claude Code users:** Use `AskUserQuestionTool` tool for all questions in this phase. +### Do NOT use this skill when: +- User wants a project status report (different scope — weekly/monthly, audience is leadership) +- User wants a written retrospective or sprint review +- User is asking about a specific ticket/PR status (use Jira/GitHub tools directly) -**GitHub/Git:** - -If `HAS_GH` or `HAS_GIT`: +### Sync vs Async Decision Tree ``` -"I detected you have GitHub/Git configured. Want me to pull your recent activity (commits, PRs, reviews)?" - -Options: -- "Yes, pull the info" -- "No, I'll provide everything manually" +Is the standup happening live (video/in-person/phone)? +├─ YES → Use full Markdown template (Phases 1→2→3 below) +│ Keep to <12 bullets; audience can ask follow-ups live +└─ NO → Is it posted to Slack/chat? + ├─ YES → Use chat format (bold headers, tag names, 4-6 lines) + │ → See references/async-remote-patterns.md + └─ NO → Standup bot (Geekbot, Standuply)? + ├─ YES → Full sentences per prompt, no bullets + └─ NO → Email/wiki → Full Markdown with links section ``` -If yes: - -``` -"Which repositories/projects should I check?" - -Options: -- "Just the current directory" (if in a git repo) -- "I'll list the repos" → user provides list -``` - -**Jira:** - -If `HAS_JIRA_CLI` or `HAS_ATLASSIAN_MCP`: - -``` -"I detected you have Jira configured. Want me to pull your tickets?" - -Options: -- "Yes, pull my tickets" -- "No, I'll provide everything manually" -``` - -### Step 3: Pull GitHub/Jira Data (if approved) - -**GitHub/Git** — For each approved repo: -- Commits by user since yesterday -- PRs opened/merged by user -- Reviews done by user +--- -**Jira** — Tickets assigned to user, updated in last 24h +## Philosophy -**Key insight**: Store results to use as context in Phase 2 interview. +The standup update is a coordination tool, not a progress report. Its job is to unblock teammates, surface dependencies before they become delays, and focus collective attention on what matters today — not to prove productivity. -### Step 4: Offer Claude Code History +--- -This integration captures everything you worked on with Claude Code — useful for recalling work that isn't in git or Jira. +## Workflow -**Detection:** -```bash -ls ~/.claude/projects/*/*.jsonl 2>/dev/null | head -1 ``` +Phase 1: DETECT & OFFER INTEGRATIONS + ↓ Check Claude Code history, gh CLI, jira CLI (silently) + ↓ Ask user which repos/integrations to pull (consent required) + ↓ Pull data NOW — before interview, not during -**If Claude Code history exists, ask:** - -``` -"I can also pull your Claude Code session history from yesterday. This can help recall work that isn't in git/Jira (research, debugging, planning). Want me to check?" +Phase 2: INTERVIEW (with insights) + ↓ Show pulled data as context for each question + ↓ Q1: Yesterday (with tool data as prompt) + ↓ Q2: Today (with Jira tickets as suggestions) + ↓ Q3: Blockers + ↓ Q4: Topics for Discussion (NEVER skip) -Options: -- "Yes, pull my Claude Code sessions" -- "No, I have everything I need" +Phase 3: GENERATE UPDATE + ↓ Combine interview answers + tool data + ↓ Format for meeting type (sync/async/bot) + ↓ Present to user ``` -**If yes, run the digest script:** - -```bash -python3 ~/.claude/skills/daily-meeting-update/scripts/claude_digest.py --format json -``` +--- -**Then present sessions with multiSelect:** +## Phase 1: Detect & Offer Integrations -Use `AskUserQuestionTool` with `multiSelect: true` to let user pick relevant items: +Detect silently (suppress all errors). Full detection commands: `references/integration-details.md` -``` -"Here are your Claude Code sessions from yesterday. Select the ones relevant to your standup:" +| Integration | Quick Detection | +|-------------|-----------------| +| Claude Code History | `~/.claude/projects/*/*.jsonl` exists | +| GitHub CLI | `gh auth status` succeeds | +| Jira CLI | `jira` command exists | +| Atlassian MCP | `mcp__atlassian__*` tools available | +| Git | Inside a git repository | -Options (multiSelect): -- "Fix authentication bug (backend-api)" -- "Implement OAuth flow (backend-api)" -- "Update homepage styles (frontend-app)" -- "Research payment providers (docs)" -``` +**For each detected integration:** Ask user if they want it pulled and which repos/projects to include. Never pull without explicit approval. -**Key insight:** User selects which sessions are work-related. Personal projects or experiments can be excluded. +**Claude Code History:** Run `scripts/claude_digest.py --format json`, then present sessions via `AskUserQuestionTool` with `multiSelect: true` so user can exclude personal projects. -**Do NOT run digest script when:** -- User explicitly says "No" to Claude Code history -- User says they'll provide everything manually -- `~/.claude/projects` directory doesn't exist +**If any integration fails:** Skip silently and proceed. No integration is required — the interview works without them. -**If digest script fails:** -- Fallback: Skip Claude Code integration silently, proceed with interview -- Common issues: Python not installed, no sessions from yesterday, permission errors -- Do NOT block the standup flow — the script is supplemental, not required +Full pull commands, error handling: `references/integration-details.md` --- -## Phase 2: Interview (with insights) +## Phase 2: Interview (with Insights) -> **Claude Code users:** Use `AskUserQuestionTool` tool to conduct the interview. This provides a better UX with structured options. +> In Claude Code: use `AskUserQuestionTool` for structured questions with options. -**Use pulled data as context** to make questions smarter. +Show pulled data as context before asking each question — this triggers memory ("I see you merged PR #123, anything else?"). -### Question 1: Yesterday +### Q1: Yesterday +Show tool data first, then: *"Anything else you worked on since the last standup that I missed?"* -**If data was pulled**, show it first: +If no data pulled: *"What did you work on yesterday/since last standup?"* -``` -"Here's what I found from your activity: -- Merged PR #123: fix login timeout -- 3 commits in backend-api -- Reviewed PR #456 (approved) +### Q2: Today +*"What will you work on today?"* -Anything else you worked on yesterday that I missed?" -``` - -**If no data pulled:** +If Jira data pulled, suggest open tickets assigned to user as options. -``` -"What did you work on yesterday/since the last standup?" -``` +### Q3: Blockers +*"Any blockers or impediments?"* → If yes, follow up for details and who needs to act. -If user response is vague, ask follow-up: -- "Can you give more details about X?" -- "Did you complete anything specific?" +### Q4: Topics for Discussion ← NEVER SKIP +*"Anything to bring up at the end of the daily?"* Examples: technical decision needing input, cross-team alignment, prioritization question, announcement. -### Question 2: Today +### Async/Remote Extra Questions (optional — offer if fully remote or async team) -``` -"What will you work on today?" - -Options: -- [Text input - user types freely] -``` +**Q5: Availability** +*"Any changes to your availability today?"* (late start, focus time, timezone conflicts) +Why: Remote teammates can't see you're AFK. Prevents cascade of blocked DMs. -**If Jira data was pulled**, you can suggest: +**Q6: Cross-team dependencies** +*"Are you waiting on anyone outside your team, or does anyone outside your team need something from you?"* +Why: Cross-team blockers stall for days async — no one escalates what isn't named. -``` -"I see you have these tickets assigned: -- PROJ-123: Implement OAuth flow (In Progress) -- PROJ-456: Fix payment bug (To Do) - -Will you work on any of these today?" -``` - -### Question 3: Blockers - -``` -"Do you have any blockers or impediments?" - -Options: -- "No blockers" -- "Yes, I have blockers" → follow-up for details -``` - -### Question 4: Topics for Discussion - -``` -"Any topic you want to bring up at the end of the daily?" - -Options: -- "No, nothing to discuss" -- "Yes" → follow-up for details - -Examples of topics: -- Technical decision that needs input -- Alignment with another team -- Question about prioritization -- Announcement or info for the team -``` +Load full async/remote patterns: `references/async-remote-patterns.md` --- ## Phase 3: Generate Update -Combine all information into clean Markdown: +Format based on meeting type determined in Navigation: +**Sync (Markdown):** ```markdown # Daily Update - [DATE] ## Yesterday -- [Items from interview] -- [Items from GitHub/Jira if pulled] +- [Items from interview + tool data] ## Today - [Items from interview] @@ -249,160 +153,71 @@ Combine all information into clean Markdown: ## Blockers - [Blockers or "No blockers"] -## PRs & Reviews (if pulled from GitHub) -- [PRs opened] -- [PRs merged] -- [Reviews done] - -## Jira (if pulled from Jira) -- [Tickets updated] +## PRs & Reviews (if GitHub pulled) +- **Opened/Merged/Reviewed:** ... ## Topics for Discussion - [Topics or "None"] - --- -*Links:* -- [PR links] -- [Ticket links] +*Links:* [PR/ticket URLs] ``` ---- - -## Core Principles - -1. **Interview is primary** — Tools supplement, they don't replace human context -2. **Consent before access** — Always ask before pulling from any integration -3. **Context-aware questions** — Show pulled data during interview to trigger memory ("I see you merged PR #123...") - ---- - -## Quick Reference - -| Phase | Action | Tool | -|-------|--------|------| -| 1. Detect & Offer | Check gh/jira/claude history, ask user, pull data | Bash (silent), AskUserQuestionTool* | -| 2. Interview | Ask 4 questions with insights | AskUserQuestionTool* | -| 3. Generate | Format Markdown | Output text | - -*Claude Code only: Use `AskUserQuestionTool` tool for structured questions. - -### Claude Code Digest Script - -```bash -# Get yesterday's sessions as JSON -python3 ~/.claude/skills/daily-meeting-update/scripts/claude_digest.py --format json - -# Get today's sessions -python3 ~/.claude/skills/daily-meeting-update/scripts/claude_digest.py --date today --format json - -# Filter to specific project -python3 ~/.claude/skills/daily-meeting-update/scripts/claude_digest.py --project ~/my-app --format json +**Async/Chat (Slack):** ``` - ---- - -## Example Session - +*Yesterday:* [2-4 items, plain text] +*Today:* [2-3 items] +*Blockers:* [one sentence, name the blocker and who needs to act] +*FYI:* [discussion topic as thread invitation] ``` -Claude: "Let's prepare your daily! I detected GitHub configured. Want me to pull your recent activity?" -User: "Yes" -Claude: "Which repos should I check?" -User: "backend-api and frontend-app" +Full output templates and Jira section format: `references/integration-details.md` -[Claude pulls GitHub data silently] +--- -Claude: "I can also pull your Claude Code session history from yesterday. Want me to check?" -User: "Yes" +## NEVER -[Claude runs digest script, gets 5 sessions] +- **NEVER run `gh`, `jira`, or `git log` without asking first** — The user's visible repos may include client work, personal projects, or branches from a confidential feature. Enumerating everything visible is a privacy violation even when technically possible. -Claude: "Here are your Claude Code sessions from yesterday. Select the ones relevant to your standup:" - ☑ Fix authentication timeout bug (backend-api) - ☑ Implement OAuth flow (backend-api) - ☐ Update personal blog (blog) - [user unchecks personal project] - ☑ Research payment providers (docs) - ☐ Debugging test failures (experiments) +- **NEVER skip the Topics for Discussion question** — This is the highest-value question in any standup. Cross-team dependencies, architectural decisions, and process issues only surface if someone asks. Tools cannot detect them. Skipping it is the most common cause of standup meetings that "feel useless." -User: [selects 3 relevant items] +- **NEVER generate more than 15 bullets total** — Standup updates are read in under 2 minutes. Beyond 15 bullets, teammates skim or skip entirely. If the user has more than 15 items, consolidate by impact: "Resolved 3 auth bugs" not three separate bullets. -Claude: "Here's what I found from your activity: -- Merged PR #120: fix login timeout -- Opened PR #125: feat: add OAuth flow -- Reviewed PR #123 (approved), PR #456 (changes requested) -- 5 commits in backend-api -- [From Claude Code] Fix authentication timeout bug -- [From Claude Code] Implement OAuth flow -- [From Claude Code] Research payment providers +- **NEVER include raw ticket/PR numbers without title or summary** — "Fixed PROJ-123" communicates nothing to a teammate reading async. "Fixed PROJ-123: session timeout causing data loss" takes 3 seconds to write and is actually actionable. -Anything else you worked on yesterday that I missed?" -User: "No, that covers it" +- **NEVER assume the current directory is the only project** — Engineers routinely work across 2–5 repos (frontend, backend, infra, shared libs). Defaulting to current directory silently excludes the majority of their work. Always ask. -Claude: "What will you work on today?" -User: "Continue the OAuth feature and deploy to staging" +- **NEVER generate the update before completing all 4 questions** — The blocker or discussion topic revealed in Q3/Q4 often reframes Q1/Q2 ("I merged that PR but it's actually blocking deploy"). Generating early produces an update that misrepresents reality. -Claude: "Any blockers?" -User: "No" +- **NEVER use full Markdown headers (`##`) in a Slack/chat async post** — Slack does not render Markdown headings. The output displays as `## Yesterday` literally. Use `*Yesterday:*` (bold inline). Sending malformatted async updates is worse than no update — it signals the user didn't review the output. -Claude: "Any topic to bring up at the end of the daily?" -User: "Yes, I want to discuss the architecture of the new payments module" +- **NEVER treat a missed-day standup the same as a normal one** — If the user skipped Monday's standup, Tuesday's "yesterday" covers 3 days. Ask: "Last time you posted was [day] — shall I cover since then?" and compress multi-day summaries rather than writing 30 bullets. -[Claude generates update] -``` +- **NEVER pull "today's" activity as yesterday** — GitHub/git defaults can include commits from the current morning. A commit made at 8:47am today belongs in "Today", not "Yesterday". Scope all tool queries to `--since="yesterday midnight"` or equivalent, not "last 24h from now." --- -## Output Example - -```markdown -# Daily Update - 2026-01-22 - -## Yesterday -- Worked on authentication feature -- Research on payment providers -- Merged PR #120 (fix: login timeout) -- Opened PR #125 (feat: add OAuth flow) - -## Today -- Continue OAuth feature -- Deploy to staging - -## Blockers -- No blockers - -## PRs & Reviews -- **Opened:** PR #125 - feat: add OAuth flow -- **Merged:** PR #120 - fix: login timeout -- **Reviews:** PR #123 (approved), PR #456 (changes requested) +## When Things Go Wrong -## Topics for Discussion -- Architecture of the new payments module +| Situation | Expert Response | +|-----------|-----------------| +| User says "I don't remember what I did" | Offer Claude Code digest first; if unavailable, ask: "What meetings did you have? Any PRs you reviewed? Any bugs you looked at even briefly?" — memory is trigger-based, not list-based | +| All integrations fail or unavailable | Proceed with pure interview — no tools required. Note: "Going manual — just answer the 4 questions" | +| User has only 5 minutes | Skip Phase 1 entirely; run Phase 2 verbally ("quick fire: yesterday? today? blockers? discussion?"); generate immediately | +| Async post with a critical blocker | Move blockers to the TOP of the update; add explicit "Action needed from @name by [time]" line; do not bury it in section 3 | +| User works across time zones (their today = team's yesterday) | Confirm the reference period: "Your standup covers work from [user's yesterday start] to now — is that right?" | +| Standup bot asks questions one at a time | Coach user: answer each prompt as a complete sentence; the bot may forward responses verbatim to a different channel | +| User's PR list includes team members' PRs they reviewed | Separate "Reviews done" from "Work opened/merged" — conflating them overstates output | --- -*Links:* -- https://github.com/org/repo/pull/125 -- https://github.com/org/repo/pull/120 -``` - ---- - -## Anti-Patterns -| Avoid | Why (Expert Knowledge) | Instead | -|-------|------------------------|---------| -| Run gh/jira without asking | Users may have personal repos visible, or be in a sensitive project context they don't want exposed | Always ask first, let user choose repos | -| Assume current directory is the only project | Developers often work on 2-5 repos simultaneously (frontend, backend, infra) | Ask "Which projects are you working on?" | -| Skip interview even with tool data | Tools capture WHAT happened but miss WHY and context (research, meetings, planning) | Interview is primary, tools supplement | -| Generate update before all 4 questions | User might have critical blocker or discussion topic that changes the narrative | Complete interview, then generate | -| Include raw commit messages | Commit messages are often cryptic ("fix", "wip") and don't tell the story | Summarize into human-readable outcomes | -| Ask for data after interview | Showing insights during interview makes questions smarter ("I see you merged PR #123, anything else?") | Pull data first, then interview with context | - ---- +## Quick Reference -## NEVER +| Phase | Action | Tool | +|-------|--------|------| +| 1. Detect & Offer | Silent check → ask consent → pull data | Bash (silent), AskUserQuestionTool | +| 2. Interview | 4 questions with pulled-data context | AskUserQuestionTool | +| 3. Generate | Format for meeting type | Text output | -- **NEVER assume tools are configured** — Many devs have gh installed but not authenticated, or jira CLI pointing to wrong instance -- **NEVER skip the "Topics for Discussion" question** — This is often the most valuable part of standup that tools can't capture -- **NEVER generate more than 15 bullets** — Standup should be <2 minutes to read; long updates lose the audience -- **NEVER include ticket/PR numbers without context** — "PROJ-123" means nothing; always include title or summary -- **NEVER pull data from repos user didn't explicitly approve** — Even if you can see other repos, respect boundaries +**Reference files:** +- `references/async-remote-patterns.md` — Slack format, bot format, time-zone edge cases, remote extras +- `references/integration-details.md` — All detection/pull commands, error handling, output templates diff --git a/skills/daily-meeting-update/references/async-remote-patterns.md b/skills/daily-meeting-update/references/async-remote-patterns.md new file mode 100644 index 0000000..1b83794 --- /dev/null +++ b/skills/daily-meeting-update/references/async-remote-patterns.md @@ -0,0 +1,111 @@ +# Async & Remote Team Standup Patterns + +## When the Meeting Isn't Real-Time + +Many teams run "standups" asynchronously — posted to Slack, a wiki, or a bot. The core 4-question structure still applies, but formatting and tone shift significantly. + +--- + +## Async Update Formats + +### Slack / Chat Post (most common) + +``` +*Yesterday:* Fixed session timeout bug (PR #120 merged), reviewed @alice's payment PR +*Today:* Continue OAuth flow (PROJ-123), unblock @bob on API schema +*Blockers:* Waiting on security sign-off for the OAuth client ID — pinged @infra-team +*FYI:* We should discuss auth token TTL strategy before Wednesday deploy — happy to start a thread +``` + +**Rules for chat format:** +- Inline bold headers, not Markdown `##` headings (Slack renders `*text*` not `## text`) +- Tag teammates who are involved in blockers or handoffs +- Keep to 4–6 lines; no bullets for Today if items are few +- Discussion topics become threaded FYIs, not a separate section + +### Email / Long-form Async (weekly recap hybrid) + +Use full Markdown output template — this context loads from SKILL.md Phase 3. + +### Standup Bot (Geekbot, Standuply, etc.) + +Many bots ask each question one-at-a-time via DM. Coach the user: +- Answer each bot prompt directly from their interview notes +- For "blockers" prompt: full sentence (bots often forward to a manager channel verbatim) +- For "today" prompt: be explicit about ticket IDs since bots often auto-link them + +--- + +## Time-Zone Edge Cases + +### User is First to Post (early timezone) + +- "Yesterday" window may not overlap with teammates' yesterday +- Clarify: "I'll include anything from the past 24h of my work, not just your team's calendar day" +- Pull GitHub data for last 24h, not "since midnight local" + +### User Missed Yesterday's Standup + +Decision tree: + +``` +Did you post anything async yesterday? +├─ Yes → "Reference your async post, add what changed since then" +└─ No → "Cover 2 days of yesterday (Thu + Fri if Mon standup)" + → Compress: "Earlier this week: X. Yesterday: Y." + → Flag the gap explicitly if blockers went unaddressed +``` + +### Cross-timezone Handoffs + +If the user's update will be read by teammates in a different timezone who act on it while the user sleeps: +- Move blockers to the TOP of the update (readers need to act immediately) +- Include explicit handoff notes: "Leaving PROJ-456 at X state — @alice can pick up from branch `feature/xyz`" +- Add a "Needs Action" section if anything requires a teammate decision + +--- + +## Team-Specific Anti-Patterns (Async) + +| Situation | Symptom | Fix | +|-----------|---------|-----| +| PM-heavy team | Update becomes a status report with metrics | Keep format terse; metrics belong in a weekly | +| High-meeting culture | User writes update nobody reads | Ask: "Who actually reads this? Format for them." | +| Monorepo team | Commits from 6 people in same repo; user's stand out poorly | Pull by author filter; lead with impact, not repo name | +| Distributed team, no overlap | "Today" items may be stale by time readers see them | Add "by EOD my timezone" estimates on key items | + +--- + +## Formatting Decision Tree: Sync vs Async + +``` +Is the meeting happening live (video/in-person)? +├─ YES → Use full Markdown template (rendered in meeting tool) +│ Keep to <12 bullets; audience can ask questions live +└─ NO → Is it posted to Slack/chat? + ├─ YES → Use chat format (bold headers, no ##, tag names) + │ Keep to 4–6 lines + └─ NO → Is it a bot/form input? + ├─ YES → Answer per prompt, full sentences, no bullets + └─ NO → Email/wiki → Full Markdown, add links section +``` + +--- + +## Remote-First Standup Extras (Questions 5 & 6) + +When teams are fully remote with no watercooler, optionally add: + +**Question 5: Availability / Context** +``` +"Any changes to your availability today? (late start, early end, off-site, focus time?)" +``` +Why this matters: Remote teams can't see you're not at your desk. Heads-up prevents blocked teammates. + +**Question 6: Cross-team dependencies** +``` +"Are you waiting on anyone outside your team, or does anyone outside your team need something from you?" +``` +Why this matters: Cross-team blockers often stall for days in async environments because nobody escalates them. + +Only offer these extra questions if the user mentions they're fully remote or the team uses async standups. Don't add them to a normal 15-minute sync standup — it bloats the update. diff --git a/skills/daily-meeting-update/references/integration-details.md b/skills/daily-meeting-update/references/integration-details.md new file mode 100644 index 0000000..34c6ae4 --- /dev/null +++ b/skills/daily-meeting-update/references/integration-details.md @@ -0,0 +1,97 @@ +# Integration Details & Detection Reference + +## Detection Commands (run silently, suppress all errors) + +| Integration | Detection Command | Notes | +|-------------|-------------------|-------| +| Claude Code History | `ls ~/.claude/projects/*/*.jsonl 2>/dev/null \| head -1` | Presence of any .jsonl file | +| GitHub CLI | `gh auth status 2>/dev/null` | Exit code 0 = authenticated | +| Jira CLI | `which jira 2>/dev/null` | Existence only; may need `jira auth` | +| Atlassian MCP | Check if `mcp__atlassian__*` tools available | MCP context check | +| Git | `git rev-parse --is-inside-work-tree 2>/dev/null` | Current directory | + +## GitHub/Git Pull Commands + +```bash +# Commits by current user in last 24h +git log --author="$(git config user.name)" --since="24 hours ago" --oneline + +# PRs opened by user (gh CLI) +gh pr list --author="@me" --state=all --limit=10 --json number,title,state,url + +# PRs reviewed by user +gh search prs --reviewed-by="@me" --updated=">$(date -d '24 hours ago' +%Y-%m-%d)" --json number,title,url + +# Merged PRs by user +gh pr list --author="@me" --state=merged --limit=5 --json number,title,url +``` + +## Claude Code Digest Script + +```bash +# Get yesterday's sessions as JSON +python3 ~/.claude/skills/daily-meeting-update/scripts/claude_digest.py --format json + +# Get today's sessions +python3 ~/.claude/skills/daily-meeting-update/scripts/claude_digest.py --date today --format json + +# Filter to specific project +python3 ~/.claude/skills/daily-meeting-update/scripts/claude_digest.py --project ~/my-app --format json +``` + +**Fallback if script fails:** Skip Claude Code integration silently, proceed with interview. The script is supplemental — never block the standup flow. + +## Jira Pull (CLI) + +```bash +# Tickets assigned to me, updated in last 24h +jira issue list --assignee=me --updated=-24h --output=json +``` + +## Atlassian MCP (if available) + +Use `mcp__atlassian__search_issues` with JQL: +``` +assignee = currentUser() AND updated >= -24h ORDER BY updated DESC +``` + +## Output Template Reference + +```markdown +# Daily Update - [DATE] + +## Yesterday +- [Items from interview + tool data] + +## Today +- [Items from interview + Jira suggestions] + +## Blockers +- [Blockers or "No blockers"] + +## PRs & Reviews (if GitHub pulled) +- **Opened:** PR #N - title +- **Merged:** PR #N - title +- **Reviews:** PR #N (approved/changes requested) + +## Jira (if pulled) +- PROJ-N: Title (Status) + +## Topics for Discussion +- [Topics or "None"] + +--- +*Links:* +- [PR/ticket URLs] +``` + +## Error Handling + +| Error | Cause | Recovery | +|-------|-------|----------| +| `gh: command not found` | GitHub CLI not installed | Skip GitHub integration, continue | +| `gh auth status` fails | Not authenticated | Skip GitHub, note to user optionally | +| `jira: command not found` | Jira CLI not installed | Skip Jira, check for Atlassian MCP | +| `claude_digest.py` fails | Python not installed / no sessions | Skip silently, continue with interview | +| Git not a repo | Not in git directory | Skip git integration for current dir, ask about specific repos | +| MCP tools unavailable | Not in Claude Code context | Skip MCP integrations entirely | diff --git a/skills/database-schema-designer/SKILL.md b/skills/database-schema-designer/SKILL.md index 624a8e2..49e42cb 100644 --- a/skills/database-schema-designer/SKILL.md +++ b/skills/database-schema-designer/SKILL.md @@ -1,687 +1,130 @@ --- name: database-schema-designer -description: Design robust, scalable database schemas for SQL and NoSQL databases. Provides normalization guidelines, indexing strategies, migration patterns, constraint design, and performance optimization. Ensures data integrity, query performance, and maintainable data models. +description: Design production-ready SQL and NoSQL database schemas. Use when asked to design schema, create tables, model data, database design, schema for, or architect a data model. Covers normalization, indexing strategy, migration patterns, constraint design, and platform-specific traps for PostgreSQL, MySQL, and MongoDB. license: MIT --- # Database Schema Designer -Design production-ready database schemas with best practices built-in. +## Mindset ---- - -## Quick Start - -Just describe your data model: - -``` -design a schema for an e-commerce platform with users, products, orders -``` - -You'll get a complete SQL schema like: - -```sql -CREATE TABLE users ( - id BIGINT AUTO_INCREMENT PRIMARY KEY, - email VARCHAR(255) UNIQUE NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - -CREATE TABLE orders ( - id BIGINT AUTO_INCREMENT PRIMARY KEY, - user_id BIGINT NOT NULL REFERENCES users(id), - total DECIMAL(10,2) NOT NULL, - INDEX idx_orders_user (user_id) -); -``` - -**What to include in your request:** -- Entities (users, products, orders) -- Key relationships (users have orders, orders have items) -- Scale hints (high-traffic, millions of records) -- Database preference (SQL/NoSQL) - defaults to SQL if not specified - ---- - -## Triggers - -| Trigger | Example | -|---------|---------| -| `design schema` | "design a schema for user authentication" | -| `database design` | "database design for multi-tenant SaaS" | -| `create tables` | "create tables for a blog system" | -| `schema for` | "schema for inventory management" | -| `model data` | "model data for real-time analytics" | -| `I need a database` | "I need a database for tracking orders" | -| `design NoSQL` | "design NoSQL schema for product catalog" | - ---- - -## Key Terms - -| Term | Definition | -|------|------------| -| **Normalization** | Organizing data to reduce redundancy (1NF → 2NF → 3NF) | -| **3NF** | Third Normal Form - no transitive dependencies between columns | -| **OLTP** | Online Transaction Processing - write-heavy, needs normalization | -| **OLAP** | Online Analytical Processing - read-heavy, benefits from denormalization | -| **Foreign Key (FK)** | Column that references another table's primary key | -| **Index** | Data structure that speeds up queries (at cost of slower writes) | -| **Access Pattern** | How your app reads/writes data (queries, joins, filters) | -| **Denormalization** | Intentionally duplicating data to speed up reads | - ---- - -## Quick Reference - -| Task | Approach | Key Consideration | -|------|----------|-------------------| -| New schema | Normalize to 3NF first | Domain modeling over UI | -| SQL vs NoSQL | Access patterns decide | Read/write ratio matters | -| Primary keys | INT or UUID | UUID for distributed systems | -| Foreign keys | Always constrain | ON DELETE strategy critical | -| Indexes | FKs + WHERE columns | Column order matters | -| Migrations | Always reversible | Backward compatible first | - ---- - -## Process Overview - -``` -Your Data Requirements - | - v -+-----------------------------------------------------+ -| Phase 1: ANALYSIS | -| * Identify entities and relationships | -| * Determine access patterns (read vs write heavy) | -| * Choose SQL or NoSQL based on requirements | -+-----------------------------------------------------+ - | - v -+-----------------------------------------------------+ -| Phase 2: DESIGN | -| * Normalize to 3NF (SQL) or embed/reference (NoSQL) | -| * Define primary keys and foreign keys | -| * Choose appropriate data types | -| * Add constraints (UNIQUE, CHECK, NOT NULL) | -+-----------------------------------------------------+ - | - v -+-----------------------------------------------------+ -| Phase 3: OPTIMIZE | -| * Plan indexing strategy | -| * Consider denormalization for read-heavy queries | -| * Add timestamps (created_at, updated_at) | -+-----------------------------------------------------+ - | - v -+-----------------------------------------------------+ -| Phase 4: MIGRATE | -| * Generate migration scripts (up + down) | -| * Ensure backward compatibility | -| * Plan zero-downtime deployment | -+-----------------------------------------------------+ - | - v -Production-Ready Schema -``` - ---- - -## Commands - -| Command | When to Use | Action | -|---------|-------------|--------| -| `design schema for {domain}` | Starting fresh | Full schema generation | -| `normalize {table}` | Fixing existing table | Apply normalization rules | -| `add indexes for {table}` | Performance issues | Generate index strategy | -| `migration for {change}` | Schema evolution | Create reversible migration | -| `review schema` | Code review | Audit existing schema | - -**Workflow:** Start with `design schema` → iterate with `normalize` → optimize with `add indexes` → evolve with `migration` - ---- +1. **Access pattern is the schema.** No access pattern = no schema. Extract the 3 most frequent read queries first; the schema exists to serve them. Normalization is a starting point, not a destination. +2. **The database enforces what the application forgets.** Every constraint you skip becomes a data integrity incident at 2am. Defensive schema design is not premature optimization. +3. **UUID v4 as a clustered PK is a silent perf killer.** Random UUIDs fragment B-tree indexes on every insert. Use `BIGSERIAL`/`BIGINT AUTO_INCREMENT` for high-write tables; use UUIDv7 (time-ordered) or `gen_random_uuid()` only when distributed ID generation is required. +4. **TOAST in PostgreSQL means your "small" table isn't small.** Columns exceeding 2KB are silently moved to a TOAST table; a query that SELECTs a wide TEXT column on 100k rows does 100k TOAST lookups. Know your column widths before indexing. +5. **Migrations run on live data.** Every DDL change is a production operation. Design for zero-downtime: add before remove, nullable before constrained, never rename in one step. -## Core Principles +## Navigation -| Principle | WHY | Implementation | -|-----------|-----|----------------| -| Model the Domain | UI changes, domain doesn't | Entity names reflect business concepts | -| Data Integrity First | Corruption is costly to fix | Constraints at database level | -| Optimize for Access Pattern | Can't optimize for both | OLTP: normalized, OLAP: denormalized | -| Plan for Scale | Retrofitting is painful | Index strategy + partitioning plan | +**Use this skill when**: designing a new schema from scratch, reviewing an existing schema for problems, planning a migration, choosing between SQL/NoSQL, or optimizing a slow query via schema changes. ---- - -## Anti-Patterns - -| Avoid | Why | Instead | -|-------|-----|---------| -| VARCHAR(255) everywhere | Wastes storage, hides intent | Size appropriately per field | -| FLOAT for money | Rounding errors | DECIMAL(10,2) | -| Missing FK constraints | Orphaned data | Always define foreign keys | -| No indexes on FKs | Slow JOINs | Index every foreign key | -| Storing dates as strings | Can't compare/sort | DATE, TIMESTAMP types | -| SELECT * in queries | Fetches unnecessary data | Explicit column lists | -| Non-reversible migrations | Can't rollback | Always write DOWN migration | -| Adding NOT NULL without default | Breaks existing rows | Add nullable, backfill, then constrain | - ---- +**Do NOT use this skill when**: writing ORM model code without schema intent (use the ORM docs), tuning query plans without schema changes (use EXPLAIN), or designing application-layer caching (Redis patterns are separate). -## Verification Checklist - -After designing a schema: - -- [ ] Every table has a primary key -- [ ] All relationships have foreign key constraints -- [ ] ON DELETE strategy defined for each FK -- [ ] Indexes exist on all foreign keys -- [ ] Indexes exist on frequently queried columns -- [ ] Appropriate data types (DECIMAL for money, etc.) -- [ ] NOT NULL on required fields -- [ ] UNIQUE constraints where needed -- [ ] CHECK constraints for validation -- [ ] created_at and updated_at timestamps -- [ ] Migration scripts are reversible -- [ ] Tested on staging with production data - ---- - -
-Deep Dive: Normalization (SQL) - -### Normal Forms - -| Form | Rule | Violation Example | -|------|------|-------------------| -| **1NF** | Atomic values, no repeating groups | `product_ids = '1,2,3'` | -| **2NF** | 1NF + no partial dependencies | customer_name in order_items | -| **3NF** | 2NF + no transitive dependencies | country derived from postal_code | - -### 1st Normal Form (1NF) - -```sql --- BAD: Multiple values in column -CREATE TABLE orders ( - id INT PRIMARY KEY, - product_ids VARCHAR(255) -- '101,102,103' -); - --- GOOD: Separate table for items -CREATE TABLE orders ( - id INT PRIMARY KEY, - customer_id INT -); - -CREATE TABLE order_items ( - id INT PRIMARY KEY, - order_id INT REFERENCES orders(id), - product_id INT -); -``` - -### 2nd Normal Form (2NF) - -```sql --- BAD: customer_name depends only on customer_id -CREATE TABLE order_items ( - order_id INT, - product_id INT, - customer_name VARCHAR(100), -- Partial dependency! - PRIMARY KEY (order_id, product_id) -); - --- GOOD: Customer data in separate table -CREATE TABLE customers ( - id INT PRIMARY KEY, - name VARCHAR(100) -); -``` - -### 3rd Normal Form (3NF) - -```sql --- BAD: country depends on postal_code -CREATE TABLE customers ( - id INT PRIMARY KEY, - postal_code VARCHAR(10), - country VARCHAR(50) -- Transitive dependency! -); - --- GOOD: Separate postal_codes table -CREATE TABLE postal_codes ( - code VARCHAR(10) PRIMARY KEY, - country VARCHAR(50) -); -``` - -### When to Denormalize - -| Scenario | Denormalization Strategy | -|----------|-------------------------| -| Read-heavy reporting | Pre-calculated aggregates | -| Expensive JOINs | Cached derived columns | -| Analytics dashboards | Materialized views | - -```sql --- Denormalized for performance -CREATE TABLE orders ( - id INT PRIMARY KEY, - customer_id INT, - total_amount DECIMAL(10,2), -- Calculated - item_count INT -- Calculated -); +**Quick decision tree:** ``` - -
- -
-Deep Dive: Data Types - -### String Types - -| Type | Use Case | Example | -|------|----------|---------| -| CHAR(n) | Fixed length | State codes, ISO dates | -| VARCHAR(n) | Variable length | Names, emails | -| TEXT | Long content | Articles, descriptions | - -```sql --- Good sizing -email VARCHAR(255) -phone VARCHAR(20) -country_code CHAR(2) +Need schema help? + ├─ New design → start with access patterns, then entities + ├─ Existing schema review → load references/schema-design-checklist.md + ├─ Performance problem → check indexes first, denormalize second + └─ Migration plan → expand "When Things Go Wrong" section below ``` -### Numeric Types +## Philosophy -| Type | Range | Use Case | -|------|-------|----------| -| TINYINT | -128 to 127 | Age, status codes | -| SMALLINT | -32K to 32K | Quantities | -| INT | -2.1B to 2.1B | IDs, counts | -| BIGINT | Very large | Large IDs, timestamps | -| DECIMAL(p,s) | Exact precision | Money | -| FLOAT/DOUBLE | Approximate | Scientific data | +Schema design is a contract between your data and your future self. Make the database enforce correctness at rest so application bugs produce errors, not silent corruption. Every deviation from normalized form must earn its keep with a measured performance justification. -```sql --- ALWAYS use DECIMAL for money -price DECIMAL(10, 2) -- $99,999,999.99 +## NEVER --- NEVER use FLOAT for money -price FLOAT -- Rounding errors! -``` +- **NEVER use UUID v4 as a clustered primary key on high-write tables** — random insertion order causes B-tree page splits on every insert; at 1M+ rows/day this degrades write throughput 3-10x versus sequential keys. Use `BIGSERIAL` or UUIDv7 instead. +- **NEVER store monetary values in FLOAT or DOUBLE** — IEEE 754 rounding means `0.1 + 0.2 = 0.30000000000000004`; a single rounding error in financial data can cascade into reconciliation failures across millions of transactions. Always `DECIMAL(19,4)`. +- **NEVER add a NOT NULL column to a large table in a single migration** — PostgreSQL and MySQL rewrite the entire table; on a 500GB table this locks writes for minutes. Pattern: add nullable → backfill in batches → add constraint. +- **NEVER create a polymorphic `entity_type` + `entity_id` column pair without a partial index** — these columns can never have a foreign key; without a partial index per type, every query requires a full scan. Create `CREATE INDEX ... WHERE entity_type = 'post'` for each type. +- **NEVER rely on application-side cascades instead of database-level ON DELETE** — application code can be bypassed (direct SQL, migrations, admin tools); orphaned rows accumulate silently. Define ON DELETE strategy on every FK. +- **NEVER index every column "just in case"** — each index doubles write amplification and consumes buffer pool; on a write-heavy table, 10 unused indexes can halve insert throughput. Index exactly the queries you have, not the queries you imagine. +- **NEVER use `TEXT` for enum-like fields without a CHECK constraint or lookup table** — `status VARCHAR(20)` without `CHECK (status IN ('active','inactive'))` will contain 'Active', 'ACTIVE', 'activ', and NULL within a year. -### Date/Time Types +## When to Break the Rules -```sql -DATE -- 2025-10-31 -TIME -- 14:30:00 -DATETIME -- 2025-10-31 14:30:00 -TIMESTAMP -- Auto timezone conversion +| Rule | When to Break | Guard Rails | +|------|--------------|-------------| +| Normalize to 3NF | OLAP / reporting tables where JOIN cost dominates | Document the denorm + add comment in schema | +| FK constraints on every relationship | Ultra-high write throughput (>50k inserts/sec); some sharded DBs can't enforce cross-shard FKs | Enforce referential integrity at application layer with tests | +| Sequential INT primary key | Distributed/multi-writer systems where coordination is impossible | Use UUIDv7 (time-ordered) not v4 | +| UTC timestamps everywhere | Systems requiring local time audit trails (legal, compliance) | Store both: `event_at TIMESTAMPTZ`, `event_at_local TEXT` | +| Single schema per domain | Multi-tenant SaaS with strong isolation requirements | Schema-per-tenant or row-level security (PostgreSQL RLS) | --- Always store in UTC -created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP -``` +## When Things Go Wrong -### Boolean +| Situation | Likely Cause | Recovery | +|-----------|-------------|----------| +| Writes slowing down as table grows | Too many indexes; UUID v4 PK fragmentation | `pg_stat_user_indexes` to find unused indexes; switch to UUIDv7 or BIGSERIAL on next rebuild | +| Migration hangs on large table | Adding NOT NULL or adding index without CONCURRENTLY | Kill migration; use `CREATE INDEX CONCURRENTLY`; add column nullable first | +| Orphaned rows accumulating | Missing ON DELETE CASCADE or RESTRICT | Audit with `LEFT JOIN ... WHERE fk IS NULL`; add FK constraints in a quiet window | +| Query ignores index | Leading column not in WHERE clause; implicit cast mismatch; low cardinality | `EXPLAIN ANALYZE` to confirm; fix column order or cast; use partial index | +| TOAST bloat causing slow SELECTs | Wide TEXT/JSONB columns selected unnecessarily | `SELECT only_needed_cols`; move large blobs to object storage, store URL | +| Schema drift between environments | Migrations applied out of order | Use a migration framework (Flyway, Alembic, golang-migrate) with checksums; never hand-edit prod | -```sql --- PostgreSQL -is_active BOOLEAN DEFAULT TRUE +## Core Decision Framework --- MySQL -is_active TINYINT(1) DEFAULT 1 -``` +### SQL vs NoSQL -
+| Signal | Choose SQL | Choose NoSQL | +|--------|-----------|-------------| +| Data shape | Relational, structured | Hierarchical, variable schema | +| Query needs | Ad-hoc joins, aggregates | Known access patterns only | +| Consistency | ACID required | Eventual OK | +| Scale pattern | Vertical + read replicas | Horizontal sharding | -
-Deep Dive: Indexing Strategy +**Default: SQL.** NoSQL requires knowing your access patterns with certainty before design; SQL tolerates query pattern changes. -### When to Create Indexes +### Primary Key Selection -| Always Index | Reason | -|--------------|--------| -| Foreign keys | Speed up JOINs | -| WHERE clause columns | Speed up filtering | -| ORDER BY columns | Speed up sorting | -| Unique constraints | Enforced uniqueness | +| Scenario | Choice | Reason | +|----------|--------|--------| +| Single-server OLTP | `BIGSERIAL` / `BIGINT AUTO_INCREMENT` | Sequential, fast inserts, small storage | +| Distributed / multi-writer | UUIDv7 (`pg_uuidv7` extension) | Time-ordered, avoids fragmentation | +| Junction table | Composite `(a_id, b_id)` | Natural PK, no surrogate needed | +| External system sync | Natural key + surrogate | Natural key as UNIQUE, surrogate as PK | -```sql --- Foreign key index -CREATE INDEX idx_orders_customer ON orders(customer_id); +### Index Decision Tree --- Query pattern index -CREATE INDEX idx_orders_status_date ON orders(status, created_at); ``` - -### Index Types - -| Type | Best For | Example | -|------|----------|---------| -| B-Tree | Ranges, equality | `price > 100` | -| Hash | Exact matches only | `email = 'x@y.com'` | -| Full-text | Text search | `MATCH AGAINST` | -| Partial | Subset of rows | `WHERE is_active = true` | - -### Composite Index Order - -```sql -CREATE INDEX idx_customer_status ON orders(customer_id, status); - --- Uses index (customer_id first) -SELECT * FROM orders WHERE customer_id = 123; -SELECT * FROM orders WHERE customer_id = 123 AND status = 'pending'; - --- Does NOT use index (status alone) -SELECT * FROM orders WHERE status = 'pending'; +Need an index? + ├─ FK column? → YES, always + ├─ In WHERE or JOIN ON? → YES, if cardinality > 10% + ├─ In ORDER BY only? → partial index if filtering first + ├─ Full-text search? → GIN (Postgres) / FULLTEXT (MySQL) + └─ JSONB field query? → GIN on the column ``` -**Rule:** Most selective column first, or column most queried alone. +**Composite index column order:** equality filters first, then range filters, then ORDER BY columns. The leftmost column must appear in your WHERE clause for the index to activate. -### Index Pitfalls +## Platform-Specific Traps -| Pitfall | Problem | Solution | -|---------|---------|----------| -| Over-indexing | Slow writes | Only index what's queried | -| Wrong column order | Unused index | Match query patterns | -| Missing FK indexes | Slow JOINs | Always index FKs | +### PostgreSQL +- `SERIAL` is deprecated — use `GENERATED ALWAYS AS IDENTITY` +- `TIMESTAMP` has no timezone; use `TIMESTAMPTZ` everywhere +- `CREATE INDEX` locks table; `CREATE INDEX CONCURRENTLY` does not (but can't run in a transaction) +- JSONB GIN index covers containment (`@>`), not equality on extracted fields — use a functional index: `CREATE INDEX ON orders ((data->>'status'))` +- `VACUUM` does not reclaim disk space; `VACUUM FULL` does but locks. Use `pg_repack` for live bloat reclaim. -
+### MySQL / MariaDB +- InnoDB clusters the table on the PK — UUID v4 causes the fragmentation problem hardest here +- `DATETIME` vs `TIMESTAMP`: TIMESTAMP stores in UTC, auto-converts; DATETIME stores literal value. Use TIMESTAMP unless you need dates beyond 2038. +- `utf8` in MySQL is actually `utf8mb3` (3-byte max); emoji breaks silently. Always `utf8mb4`. +- Altering a column type on a large InnoDB table copies the full table in-place — use `pt-online-schema-change` or `gh-ost`. -
-Deep Dive: Constraints +### MongoDB +- Document size hard limit: 16MB. Embedding unbounded arrays (e.g., all comments in a post) hits this. Reference when the child collection grows without bound. +- Indexes are per-collection, not enforced across collections — referential integrity is entirely application responsibility. +- `$lookup` (join) is expensive; if you need it on hot paths, you embedded wrong. -### Primary Keys - -```sql --- Auto-increment (simple) -id INT AUTO_INCREMENT PRIMARY KEY - --- UUID (distributed systems) -id CHAR(36) PRIMARY KEY DEFAULT (UUID()) - --- Composite (junction tables) -PRIMARY KEY (student_id, course_id) -``` +## References -### Foreign Keys - -```sql -FOREIGN KEY (customer_id) REFERENCES customers(id) - ON DELETE CASCADE -- Delete children with parent - ON DELETE RESTRICT -- Prevent deletion if referenced - ON DELETE SET NULL -- Set to NULL when parent deleted - ON UPDATE CASCADE -- Update children when parent changes -``` - -| Strategy | Use When | -|----------|----------| -| CASCADE | Dependent data (order_items) | -| RESTRICT | Important references (prevent accidents) | -| SET NULL | Optional relationships | - -### Other Constraints - -```sql --- Unique -email VARCHAR(255) UNIQUE NOT NULL - --- Composite unique -UNIQUE (student_id, course_id) - --- Check -price DECIMAL(10,2) CHECK (price >= 0) -discount INT CHECK (discount BETWEEN 0 AND 100) - --- Not null -name VARCHAR(100) NOT NULL -``` - -
- -
-Deep Dive: Relationship Patterns - -### One-to-Many - -```sql -CREATE TABLE orders ( - id INT PRIMARY KEY, - customer_id INT NOT NULL REFERENCES customers(id) -); - -CREATE TABLE order_items ( - id INT PRIMARY KEY, - order_id INT NOT NULL REFERENCES orders(id) ON DELETE CASCADE, - product_id INT NOT NULL, - quantity INT NOT NULL -); -``` - -### Many-to-Many - -```sql --- Junction table -CREATE TABLE enrollments ( - student_id INT REFERENCES students(id) ON DELETE CASCADE, - course_id INT REFERENCES courses(id) ON DELETE CASCADE, - enrolled_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (student_id, course_id) -); -``` - -### Self-Referencing - -```sql -CREATE TABLE employees ( - id INT PRIMARY KEY, - name VARCHAR(100) NOT NULL, - manager_id INT REFERENCES employees(id) -); -``` - -### Polymorphic - -```sql --- Approach 1: Separate FKs (stronger integrity) -CREATE TABLE comments ( - id INT PRIMARY KEY, - content TEXT NOT NULL, - post_id INT REFERENCES posts(id), - photo_id INT REFERENCES photos(id), - CHECK ( - (post_id IS NOT NULL AND photo_id IS NULL) OR - (post_id IS NULL AND photo_id IS NOT NULL) - ) -); - --- Approach 2: Type + ID (flexible, weaker integrity) -CREATE TABLE comments ( - id INT PRIMARY KEY, - content TEXT NOT NULL, - commentable_type VARCHAR(50) NOT NULL, - commentable_id INT NOT NULL -); -``` - -
- -
-Deep Dive: NoSQL Design (MongoDB) - -### Embedding vs Referencing - -| Factor | Embed | Reference | -|--------|-------|-----------| -| Access pattern | Read together | Read separately | -| Relationship | 1:few | 1:many | -| Document size | Small | Approaching 16MB | -| Update frequency | Rarely | Frequently | - -### Embedded Document - -```json -{ - "_id": "order_123", - "customer": { - "id": "cust_456", - "name": "Jane Smith", - "email": "jane@example.com" - }, - "items": [ - { "product_id": "prod_789", "quantity": 2, "price": 29.99 } - ], - "total": 109.97 -} -``` - -### Referenced Document - -```json -{ - "_id": "order_123", - "customer_id": "cust_456", - "item_ids": ["item_1", "item_2"], - "total": 109.97 -} -``` - -### MongoDB Indexes - -```javascript -// Single field -db.users.createIndex({ email: 1 }, { unique: true }); - -// Composite -db.orders.createIndex({ customer_id: 1, created_at: -1 }); - -// Text search -db.articles.createIndex({ title: "text", content: "text" }); - -// Geospatial -db.stores.createIndex({ location: "2dsphere" }); -``` - -
- -
-Deep Dive: Migrations - -### Migration Best Practices - -| Practice | WHY | -|----------|-----| -| Always reversible | Need to rollback | -| Backward compatible | Zero-downtime deploys | -| Schema before data | Separate concerns | -| Test on staging | Catch issues early | - -### Adding a Column (Zero-Downtime) - -```sql --- Step 1: Add nullable column -ALTER TABLE users ADD COLUMN phone VARCHAR(20); - --- Step 2: Deploy code that writes to new column - --- Step 3: Backfill existing rows -UPDATE users SET phone = '' WHERE phone IS NULL; - --- Step 4: Make required (if needed) -ALTER TABLE users MODIFY phone VARCHAR(20) NOT NULL; -``` - -### Renaming a Column (Zero-Downtime) - -```sql --- Step 1: Add new column -ALTER TABLE users ADD COLUMN email_address VARCHAR(255); - --- Step 2: Copy data -UPDATE users SET email_address = email; - --- Step 3: Deploy code reading from new column --- Step 4: Deploy code writing to new column - --- Step 5: Drop old column -ALTER TABLE users DROP COLUMN email; -``` - -### Migration Template - -```sql --- Migration: YYYYMMDDHHMMSS_description.sql - --- UP -BEGIN; -ALTER TABLE users ADD COLUMN phone VARCHAR(20); -CREATE INDEX idx_users_phone ON users(phone); -COMMIT; - --- DOWN -BEGIN; -DROP INDEX idx_users_phone ON users; -ALTER TABLE users DROP COLUMN phone; -COMMIT; -``` - -
- -
-Deep Dive: Performance Optimization - -### Query Analysis - -```sql -EXPLAIN SELECT * FROM orders -WHERE customer_id = 123 AND status = 'pending'; -``` - -| Look For | Meaning | -|----------|---------| -| type: ALL | Full table scan (bad) | -| type: ref | Index used (good) | -| key: NULL | No index used | -| rows: high | Many rows scanned | - -### N+1 Query Problem - -```python -# BAD: N+1 queries -orders = db.query("SELECT * FROM orders") -for order in orders: - customer = db.query(f"SELECT * FROM customers WHERE id = {order.customer_id}") - -# GOOD: Single JOIN -results = db.query(""" - SELECT orders.*, customers.name - FROM orders - JOIN customers ON orders.customer_id = customers.id -""") -``` - -### Optimization Techniques - -| Technique | When to Use | -|-----------|-------------| -| Add indexes | Slow WHERE/ORDER BY | -| Denormalize | Expensive JOINs | -| Pagination | Large result sets | -| Caching | Repeated queries | -| Read replicas | Read-heavy load | -| Partitioning | Very large tables | - -
- ---- +Load these when needed — do not include in every response: -## Extension Points +- **Full checklist**: `references/schema-design-checklist.md` — use during schema review or audit tasks +- **Normalization deep dive**: explain 1NF/2NF/3NF with examples only when user asks to normalize an existing schema +- **Migration patterns**: expand zero-downtime steps only when user asks for migration scripts -1. **Database-Specific Patterns:** Add MySQL vs PostgreSQL vs SQLite variations -2. **Advanced Patterns:** Time-series, event sourcing, CQRS, multi-tenancy -3. **ORM Integration:** TypeORM, Prisma, SQLAlchemy patterns -4. **Monitoring:** Query performance tracking, slow query alerts +Once your schema is designed, use openapi-to-typescript to generate TypeScript interfaces from an OpenAPI spec that mirrors your data model — especially useful for readOnly/writeOnly field splitting between read/write DTOs. diff --git a/skills/datadog-cli/SKILL.md b/skills/datadog-cli/SKILL.md index b1cc94e..b72aaf3 100644 --- a/skills/datadog-cli/SKILL.md +++ b/skills/datadog-cli/SKILL.md @@ -1,127 +1,117 @@ --- name: datadog-cli -description: Datadog CLI for searching logs, querying metrics, tracing requests, and managing dashboards. Use this when debugging production issues or working with Datadog observability. +description: Datadog CLI for searching logs, querying metrics, tracing distributed requests, and managing dashboards. Use when debugging production incidents, investigating error spikes, correlating traces, or analyzing service health in Datadog. --- # Datadog CLI -A CLI tool for AI agents to debug and triage using Datadog logs and metrics. +## Mindset -## Required Reading +- **Start with `errors` + `compare`, not raw search** — `errors` aggregates by service/type instantly; `compare` tells you if the spike is new or chronic. Raw search before these wastes API quota and context. +- **Trace ID is your thread through chaos** — when a user reports a broken transaction, get the trace ID first (`logs search` with `@http.url` or `@user.id`), then `logs trace` to reconstruct the full call chain across services. Don't search per-service manually. +- **`patterns` before reading individual logs** — Datadog indexes thousands of variants of the same error. `logs patterns` normalizes UUIDs/IPs/numbers into templates; reading raw logs before this is signal-diluting. +- **Dashboard updates are destructive** — the API replaces the entire dashboard JSON. A partial PUT deletes widgets you didn't include. Always `get` → modify → `update`. +- **Rate limits are silent trip wires** — the Logs Search API is capped at 300 requests/minute per org. `logs multi` with many parallel queries can exhaust quota for the whole team. Use `--limit` conservatively. -**You MUST read the relevant reference docs before using any command:** -- [Log Commands](references/logs-commands.md) -- [Metrics](references/metrics.md) -- [Query Syntax](references/query-syntax.md) -- [Workflows](references/workflows.md) -- [Dashboards](references/dashboards.md) +## Navigation -## Setup +**Use this skill when**: searching Datadog logs, querying metrics timeseries, tracing distributed requests by trace ID, detecting log patterns, comparing error rates between time windows, managing dashboards, or investigating a production incident end-to-end. -### Environment Variables (Required) +**Do NOT use this skill when**: +- The user wants to configure Datadog monitors/alerts (requires Datadog API directly, not this CLI) +- The user wants to query Synthetics, RUM, or Security signals (not supported by this CLI) +- No `DD_API_KEY` + `DD_APP_KEY` are set — prompt for them first -```bash -export DD_API_KEY="your-api-key" -export DD_APP_KEY="your-app-key" -``` +**Ambiguous input decision tree**: +- "Check errors" → `errors --from 1h` then `logs compare` +- "Debug request X" → get trace ID → `logs trace` +- "Is the spike new?" → `logs compare --period 1h` +- "What's failing?" → `logs patterns --query "status:error"` -Get keys from: https://app.datadoghq.com/organization-settings/api-keys +## Philosophy -### Running the CLI +Observability triage is a narrowing funnel: start wide (service-level aggregations), identify the signal (patterns), then zoom in (individual logs + traces). Never start at the bottom of the funnel. -```bash -npx @leoflores/datadog-cli -``` +## NEVER -For non-US Datadog sites, use `--site` flag: -```bash -npx @leoflores/datadog-cli logs search --query "*" --site datadoghq.eu -``` +- NEVER run `logs search` with `--query "*"` and no `--limit` — the default 100-result limit still pages through the full index scan; always scope by `service:` or `status:` to avoid hitting rate limits and returning noise. +- NEVER run `dashboards update` without first running `dashboards get` — the update endpoint replaces the full dashboard JSON; any widget you omit is permanently deleted from the dashboard. +- NEVER use `logs tail` in an automated/non-interactive context — it polls indefinitely and will block the agent; use `logs search` with a short `--from` window instead. +- NEVER send `logs multi` with more than 5 parallel queries without checking org rate limits — each parallel query counts as a separate API call; 10+ queries can exhaust the 300 req/min org limit mid-investigation. +- NEVER query metrics with `{*}` scope and a long window (e.g., `--from 7d`) — this returns all host timeseries; scope with `{service:X,env:prod}` or you'll get truncated/sampled data silently. +- NEVER interpret a 0-count result as "no errors" — it may mean the query matched no indexed facets (typo in service name, wrong env tag). Verify with `services --from 1h` to confirm the service is emitting logs at all. -## Commands Overview +## When Things Go Wrong -| Command | Description | -|---------|-------------| -| `logs search` | Search logs with filters | -| `logs tail` | Stream logs in real-time | -| `logs trace` | Find logs for a distributed trace | -| `logs context` | Get logs before/after a timestamp | -| `logs patterns` | Group similar log messages | -| `logs compare` | Compare log counts between periods | -| `logs multi` | Run multiple queries in parallel | -| `logs agg` | Aggregate logs by facet | -| `metrics query` | Query timeseries metrics | -| `errors` | Quick error summary by service/type | -| `services` | List services with log activity | -| `dashboards` | Manage dashboards (CRUD) | -| `dashboard-lists` | Manage dashboard lists | - - -## Quick Examples - -### Search Errors -```bash -npx @leoflores/datadog-cli logs search --query "status:error" --from 1h --pretty -``` +| Situation | Likely Cause | Recovery | +|-----------|-------------|----------| +| `401 Unauthorized` | `DD_API_KEY` or `DD_APP_KEY` not set / wrong org | `echo $DD_API_KEY` — if empty, export both keys; confirm keys are for the correct Datadog site | +| `429 Too Many Requests` | Org rate limit hit (300 req/min for Logs) | Wait 60 seconds; reduce `logs multi` parallelism; add `--limit 50` to searches | +| Empty results but no error | Wrong site (`datadoghq.com` vs `datadoghq.eu`) or service name typo | Run `services --from 24h` to confirm service exists; add `--site datadoghq.eu` if EU org | +| `logs trace` returns nothing | Trace ID format mismatch — some services emit `@trace_id`, others `@dd.trace_id` | The CLI searches both, but extend `--from` window; trace may have aged out of index (default 15-day retention) | +| Dashboard `update` wiped widgets | Sent partial JSON without fetching first | Restore from Datadog UI version history (Settings → Restore); next time always `get` → edit → `update` | +| `logs patterns` shows only 1 pattern | Query too narrow or low log volume | Widen `--from` window or remove service filter temporarily | -### Tail Logs (Real-time) -```bash -npx @leoflores/datadog-cli logs tail --query "service:api status:error" --pretty -``` +## Setup -### Error Summary ```bash -npx @leoflores/datadog-cli errors --from 1h --pretty +export DD_API_KEY="your-api-key" +export DD_APP_KEY="your-app-key" ``` -### Trace Correlation -```bash -npx @leoflores/datadog-cli logs trace --id "abc123def456" --pretty -``` +Keys: https://app.datadoghq.com/organization-settings/api-keys -### Query Metrics -```bash -npx @leoflores/datadog-cli metrics query --query "avg:system.cpu.user{*}" --from 1h --pretty -``` +For EU orgs, add `--site datadoghq.eu` to every command. -### Compare Periods ```bash -npx @leoflores/datadog-cli logs compare --query "status:error" --period 1h --pretty +npx @leoflores/datadog-cli ``` -## Global Flags - -| Flag | Description | -|------|-------------| -| `--pretty` | Human-readable output with colors | -| `--output ` | Export results to JSON file | -| `--site ` | Datadog site (e.g., `datadoghq.eu`) | - -## Time Formats - -- **Relative**: `30m`, `1h`, `6h`, `24h`, `7d` -- **ISO 8601**: `2024-01-15T10:30:00Z` - -## Incident Triage Workflow +## Incident Triage — Standard Sequence ```bash -# 1. Quick error overview +# 1. Aggregate: what services are erroring and how many? npx @leoflores/datadog-cli errors --from 1h --pretty -# 2. Is this new? Compare to previous period +# 2. Baseline: is this spike new or ongoing? npx @leoflores/datadog-cli logs compare --query "status:error" --period 1h --pretty -# 3. Find error patterns -npx @leoflores/datadog-cli logs patterns --query "status:error" --from 1h --pretty +# 3. Pattern: what are the error templates? (normalize before reading raw) +npx @leoflores/datadog-cli logs patterns --query "status:error service:api" --from 1h --pretty -# 4. Narrow down by service -npx @leoflores/datadog-cli logs search --query "status:error service:api" --from 1h --pretty +# 4. Scope: narrow to the failing service +npx @leoflores/datadog-cli logs search --query "status:error service:api" --from 1h --limit 50 --pretty -# 5. Get context around a timestamp -npx @leoflores/datadog-cli logs context --timestamp "2024-01-15T10:30:00Z" --service api --pretty +# 5. Context: get logs around the failure timestamp +npx @leoflores/datadog-cli logs context --timestamp "2024-01-15T10:30:00Z" --service api --before 5m --after 2m --pretty -# 6. Follow the distributed trace +# 6. Trace: reconstruct full distributed call chain npx @leoflores/datadog-cli logs trace --id "TRACE_ID" --pretty ``` -See [workflows.md](references/workflows.md) for more debugging workflows. +## Commands Quick Reference + +| Command | When to use | +|---------|-------------| +| `errors` | First look — error counts by service/type | +| `logs compare` | Determine if spike is new vs. baseline | +| `logs patterns` | Normalize error variants before reading raw | +| `logs search` | Targeted log retrieval (after narrowing) | +| `logs trace` | Reconstruct distributed request from trace ID | +| `logs context` | Get what happened just before/after a timestamp | +| `logs agg` | Break down logs by facet (status, host, error.kind) | +| `logs multi` | Parallel cross-service comparison (use sparingly) | +| `logs tail` | Interactive only — stream live logs | +| `metrics query` | Correlate log errors with CPU/latency/throughput | +| `services` | Verify a service is emitting logs (sanity check) | +| `dashboards` | CRUD — always `get` before `update` | + +## Reference Docs + +Load these only when needed for the specific task: + +- **[logs-commands.md](references/logs-commands.md)** — full flag reference for all `logs *` subcommands +- **[query-syntax.md](references/query-syntax.md)** — operators, facet names, numeric comparisons, wildcard patterns +- **[metrics.md](references/metrics.md)** — metrics query format, APM metrics, aggregation functions +- **[workflows.md](references/workflows.md)** — multi-step workflows (real-time debug, service health, export) +- **[dashboards.md](references/dashboards.md)** — full dashboard CRUD reference + safe update workflow diff --git a/skills/dependency-updater/SKILL.md b/skills/dependency-updater/SKILL.md index edede5b..786b7ae 100644 --- a/skills/dependency-updater/SKILL.md +++ b/skills/dependency-updater/SKILL.md @@ -1,491 +1,98 @@ --- name: dependency-updater -description: Smart dependency management for any language. Auto-detects project type, applies safe updates automatically, prompts for major versions, diagnoses and fixes dependency issues. +description: Smart dependency management for any language. Auto-detects project type, applies safe updates automatically, prompts for major versions, diagnoses and fixes dependency issues. Trigger phrases: "update dependencies", "update deps", "outdated packages", "dependency audit", "fix dependency conflicts", "security audit packages", "why won't my packages install". license: MIT metadata: - version: 1.0.0 + version: 2.0.0 --- # Dependency Updater -Smart dependency management for any language with automatic detection and safe updates. +## Mindset ---- - -## Quick Start - -``` -update my dependencies -``` - -The skill auto-detects your project type and handles the rest. - ---- - -## Triggers - -| Trigger | Example | -|---------|---------| -| Update dependencies | "update dependencies", "update deps" | -| Check outdated | "check for outdated packages" | -| Fix dependency issues | "fix my dependency problems" | -| Security audit | "audit dependencies for vulnerabilities" | -| Diagnose deps | "diagnose dependency issues" | - ---- - -## Supported Languages - -| Language | Package File | Update Tool | Audit Tool | -|----------|--------------|-------------|------------| -| **Node.js** | package.json | `taze` | `npm audit` | -| **Python** | requirements.txt, pyproject.toml | `pip-review` | `safety`, `pip-audit` | -| **Go** | go.mod | `go get -u` | `govulncheck` | -| **Rust** | Cargo.toml | `cargo update` | `cargo audit` | -| **Ruby** | Gemfile | `bundle update` | `bundle audit` | -| **Java** | pom.xml, build.gradle | `mvn versions:*` | `mvn dependency:*` | -| **.NET** | *.csproj | `dotnet outdated` | `dotnet list package --vulnerable` | - ---- - -## Quick Reference +- **Pinned versions are load-bearing** — a fixed version without `^` or `~` is a *decision*, not an oversight. Someone got burned and locked it. Never "fix" it to a range without checking git blame. +- **Lock files are the ground truth** — `package.json` is a *request*, `package-lock.json` is *what actually runs*. When they diverge (e.g., after a `git pull` that updated package.json but not the lock), `npm install` silently installs different code than production. +- **`npm audit --force` is a footgun** — it resolves vulnerabilities by *downgrading or breaking semver constraints*, leaving the project in an inconsistent state that CI can't reproduce. Practitioners use targeted upgrades, not force. +- **Major version bumps require changelog archaeology** — tools can detect the version delta but not whether your usage of the old API was in the breaking-change surface. Always check the migration guide before approving a major. +- **Monorepos compound the risk** — updating a shared package at the root can silently change behavior in workspaces that weren't tested. Run workspace-scoped installs after root changes. -| Update Type | Version Change | Action | -|-------------|----------------|--------| -| **Fixed** | No `^` or `~` | Skip (intentionally pinned) | -| **PATCH** | `x.y.z` → `x.y.Z` | Auto-apply | -| **MINOR** | `x.y.z` → `x.Y.0` | Auto-apply | -| **MAJOR** | `x.y.z` → `X.0.0` | Prompt user individually | +## Navigation ---- +**Use this skill when**: +- User asks to update, upgrade, or refresh dependencies/packages/deps +- User reports install failures, peer-dependency warnings, or version conflicts +- User asks for a security audit or vulnerability scan of packages +- User asks what packages are outdated or stale -## Workflow +**Do NOT use this skill when**: +- User wants to *add a new* package (that's a feature task, not an update) +- User is working inside a container/locked environment with no write access to package files +- The project uses Nix, Bazel, or vendored deps — standard update tools break these +**Quick decision tree**: ``` -User Request - │ - ▼ -┌─────────────────────────────────────────────────────┐ -│ Step 1: DETECT PROJECT TYPE │ -│ • Scan for package files (package.json, go.mod...) │ -│ • Identify package manager │ -├─────────────────────────────────────────────────────┤ -│ Step 2: CHECK PREREQUISITES │ -│ • Verify required tools are installed │ -│ • Suggest installation if missing │ -├─────────────────────────────────────────────────────┤ -│ Step 3: SCAN FOR UPDATES │ -│ • Run language-specific outdated check │ -│ • Categorize: MAJOR / MINOR / PATCH / Fixed │ -├─────────────────────────────────────────────────────┤ -│ Step 4: AUTO-APPLY SAFE UPDATES │ -│ • Apply MINOR and PATCH automatically │ -│ • Report what was updated │ -├─────────────────────────────────────────────────────┤ -│ Step 5: PROMPT FOR MAJOR UPDATES │ -│ • AskUserQuestion for each MAJOR update │ -│ • Show current → new version │ -├─────────────────────────────────────────────────────┤ -│ Step 6: APPLY APPROVED MAJORS │ -│ • Update only approved packages │ -├─────────────────────────────────────────────────────┤ -│ Step 7: FINALIZE │ -│ • Run install command │ -│ • Run security audit │ -└─────────────────────────────────────────────────────┘ +User request type? +├── "update" / "outdated" → full update workflow (detect → scan → apply → audit) +├── "security" / "vuln" / "audit" → audit-only workflow +├── "broken" / "can't install" / "conflict" → diagnosis mode +└── "specific package X" → targeted update, not bulk ``` ---- - -## Commands by Language - -### Node.js (npm/yarn/pnpm) - -```bash -# Check prerequisites -scripts/check-tool.sh taze "npm install -g taze" - -# Scan for updates -taze - -# Apply minor/patch -taze minor --write - -# Apply specific majors -taze major --write --include pkg1,pkg2 +## Philosophy -# Monorepo support -taze -r # recursive - -# Security -npm audit -npm audit fix -``` - -### Python - -```bash -# Check outdated -pip list --outdated - -# Update all (careful!) -pip-review --auto - -# Update specific -pip install --upgrade package-name - -# Security -pip-audit -safety check -``` +Safety over convenience: auto-apply only what semver guarantees is backward-compatible, gate everything else behind explicit user approval, and never mutate pinned versions or lock files in ways the ecosystem's install command wouldn't. -### Go +## NEVER -```bash -# Check outdated -go list -m -u all +- **NEVER run `npm audit --force`** — it breaks semver constraints, can *downgrade* packages to older vulnerable versions, and produces a lock file that diverges from package.json in ways that break reproducible installs. Use `npm audit fix` (no `--force`) or upgrade the specific package manually. +- **NEVER run `pip-review --auto` without a virtualenv active** — it upgrades system Python packages, which breaks OS-level tools that depend on specific versions (particularly on Debian/Ubuntu). Always confirm a venv is active first. +- **NEVER auto-apply MAJOR updates in bulk** — even if the user says "update everything", batch-approving majors makes it impossible to bisect which package broke the build. Present each major individually with current → new and a link to the changelog. +- **NEVER delete and regenerate a lock file as the first fix** — `rm package-lock.json && npm install` wipes all transitive version pins, meaning dependencies-of-dependencies can jump to breaking versions. Use this only as a last resort after targeted fixes fail. +- **NEVER run `go get -u ./...` in a module with replace directives** — `-u` ignores `replace` directives for indirect dependencies, silently upgrading past the pinned fork/patch. Use `go get pkg@version` for each module individually. +- **NEVER skip `go mod tidy` after `go get` updates** — Go's toolchain won't error, but `go.sum` will contain stale hashes that fail verification in hermetic CI environments. +- **NEVER treat `cargo update` as safe for workspace crates** — `cargo update` respects semver *ranges* in Cargo.toml but doesn't check that workspace member crates compile together. Always run `cargo check --workspace` after. -# Update all -go get -u ./... +## When Things Go Wrong -# Tidy up -go mod tidy - -# Security -govulncheck ./... -``` - -### Rust - -```bash -# Check outdated -cargo outdated - -# Update within semver -cargo update - -# Security -cargo audit -``` - -### Ruby - -```bash -# Check outdated -bundle outdated - -# Update all -bundle update - -# Update specific -bundle update --conservative gem-name - -# Security -bundle audit -``` - -### Java (Maven) - -```bash -# Check outdated -mvn versions:display-dependency-updates - -# Update to latest -mvn versions:use-latest-releases - -# Security -mvn dependency:tree -mvn dependency-check:check -``` - -### .NET - -```bash -# Check outdated -dotnet list package --outdated - -# Update specific -dotnet add package PackageName - -# Security -dotnet list package --vulnerable -``` +| Situation | Likely Cause | Recovery | +|-----------|-------------|----------| +| `npm install` succeeds locally but CI fails | Lock file not committed, or committed with wrong line endings | `git add package-lock.json` with `.gitattributes` setting `text=auto eol=lf` | +| `npm audit fix` creates new vulnerabilities | Downgraded a transitive dep to an older vulnerable version | `git checkout package-lock.json`, upgrade the *direct* dep that pulls in the vulnerable transitive | +| Peer dependency warnings flood output but nothing breaks | npm v7+ installs peers automatically; warnings are noise if app works | Check with `npm ls ` — if only one version installed, safe to ignore | +| `pip install -r requirements.txt` works but `pip check` fails | requirements.txt has incompatible upper bounds from different authors | Use `pip-compile` (pip-tools) to resolve a coherent set; add `pip check` to CI | +| `bundle update` downgrades an unrelated gem | Bundler re-solves the whole graph; a newer gem narrowed a shared constraint | Use `bundle update --conservative gem-name` to update only the target gem's graph | +| `go get -u` introduces a module that fails `go mod verify` | Checksum mismatch — module contents changed after publish (supply chain risk) | Do NOT ignore; report to the module maintainer; pin to last known-good SHA | --- -## Diagnosis Mode - -When dependencies are broken, run diagnosis: - -### Common Issues & Fixes - -| Issue | Symptoms | Fix | -|-------|----------|-----| -| **Version Conflict** | "Cannot resolve dependency tree" | Clean install, use overrides/resolutions | -| **Peer Dependency** | "Peer dependency not satisfied" | Install required peer version | -| **Security Vuln** | `npm audit` shows issues | `npm audit fix` or manual update | -| **Unused Deps** | Bloated bundle | Run `depcheck` (Node) or equivalent | -| **Duplicate Deps** | Multiple versions installed | Run `npm dedupe` or equivalent | - -### Emergency Fixes - -```bash -# Node.js - Nuclear reset -rm -rf node_modules package-lock.json -npm cache clean --force -npm install - -# Python - Clean virtualenv -rm -rf venv -python -m venv venv -source venv/bin/activate -pip install -r requirements.txt - -# Go - Reset modules -rm go.sum -go mod tidy -``` - ---- - -## Security Audit - -Run security checks for any project: - -```bash -# Node.js -npm audit -npm audit --json | jq '.metadata.vulnerabilities' - -# Python -pip-audit -safety check - -# Go -govulncheck ./... - -# Rust -cargo audit - -# Ruby -bundle audit - -# .NET -dotnet list package --vulnerable -``` - -### Severity Response - -| Severity | Action | -|----------|--------| -| **Critical** | Fix immediately | -| **High** | Fix within 24h | -| **Moderate** | Fix within 1 week | -| **Low** | Fix in next release | +## Workflow ---- +**Step 1 — Detect**: Scan for package files (`package.json`, `go.mod`, `Cargo.toml`, `requirements.txt`, `Gemfile`, `pom.xml`, `*.csproj`). Check for workspace/monorepo patterns. Identify the package manager (npm vs yarn vs pnpm matters for lock file format). -## Anti-Patterns +**Step 2 — Prerequisites**: Verify tooling. For Node.js, prefer `taze` over `ncu` (taze respects workspace protocols). For Python, confirm virtualenv is active before any `pip` mutation. -| Avoid | Why | Instead | -|-------|-----|---------| -| Update fixed versions | Intentionally pinned | Skip them | -| Auto-apply MAJOR | Breaking changes | Prompt user | -| Batch MAJOR prompts | Loses context | Prompt individually | -| Skip lock file | Irreproducible builds | Always commit lock files | -| Ignore security alerts | Vulnerabilities | Address by severity | +**Step 3 — Scan**: Run the ecosystem's outdated command. Categorize results: +- Fixed (no range specifier) → **skip, note in report** +- PATCH/MINOR within current range → **auto-apply** +- MAJOR or outside current range → **queue for user approval** ---- +**Step 4 — Apply safe updates**: Apply PATCH + MINOR. For Node.js: `taze minor --write` then `npm install`. Run tests if available. -## Verification Checklist +**Step 5 — Gate majors**: For each MAJOR update, present: package name, current version, target version, and changelog URL. Ask individually. Apply only approved ones. -After updates: +**Step 6 — Audit**: Run the ecosystem's security scanner. Report findings by severity. Do NOT auto-fix — present the vulnerable package, the fix version, and whether it's a breaking change. -- [ ] Updates scanned without errors -- [ ] MINOR/PATCH auto-applied -- [ ] MAJOR updates prompted individually -- [ ] Fixed versions untouched -- [ ] Lock file updated -- [ ] Install command ran -- [ ] Security audit passed (or issues noted) +**Step 7 — Report**: Summary of what changed, what was skipped (pinned), what needs manual attention (majors declined, unfixed vulns). --- -
-Deep Dive: Project Detection - -The skill auto-detects project type by scanning for package files: - -| File Found | Language | Package Manager | -|------------|----------|-----------------| -| `package.json` | Node.js | npm/yarn/pnpm | -| `requirements.txt` | Python | pip | -| `pyproject.toml` | Python | pip/poetry | -| `Pipfile` | Python | pipenv | -| `go.mod` | Go | go modules | -| `Cargo.toml` | Rust | cargo | -| `Gemfile` | Ruby | bundler | -| `pom.xml` | Java | Maven | -| `build.gradle` | Java/Kotlin | Gradle | -| `*.csproj` | .NET | dotnet | - -**Detection order matters for monorepos:** -1. Check current directory first -2. Then check for workspace/monorepo patterns -3. Offer to run recursively if applicable - -
- -
-Deep Dive: Node.js with taze - -### Prerequisites - -```bash -# Install taze globally (recommended) -npm install -g taze - -# Or use npx -npx taze -``` - -### Smart Update Flow - -```bash -# 1. Scan all updates -taze - -# 2. Apply safe updates (minor + patch) -taze minor --write - -# 3. For each major, prompt user: -# "Update @types/node from ^20.0.0 to ^22.0.0?" -# If yes, add to approved list - -# 4. Apply approved majors -taze major --write --include approved-pkg1,approved-pkg2 - -# 5. Install -npm install # or pnpm install / yarn -``` - -### Auto-Approve List - -Some packages have frequent major bumps but are backward-compatible: - -| Package | Reason | -|---------|--------| -| `lucide-react` | Icon library, majors are additive | -| `@types/*` | Type definitions, usually safe | +## Commands Reference -
+See [`references/commands-by-language.md`](references/commands-by-language.md) for the full command reference per ecosystem. -
-Deep Dive: Version Strategies - -### Semantic Versioning - -``` -MAJOR.MINOR.PATCH (e.g., 2.3.1) - -MAJOR: Breaking changes - requires code changes -MINOR: New features - backward compatible -PATCH: Bug fixes - backward compatible -``` - -### Range Specifiers - -| Specifier | Meaning | Example | -|-----------|---------|---------| -| `^1.2.3` | Minor + Patch OK | `>=1.2.3 <2.0.0` | -| `~1.2.3` | Patch only | `>=1.2.3 <1.3.0` | -| `1.2.3` | Exact (fixed) | Only `1.2.3` | -| `>=1.2.3` | At least | Any `>=1.2.3` | -| `*` | Any | Latest (dangerous) | - -### Recommended Strategy - -```json -{ - "dependencies": { - "critical-lib": "1.2.3", // Exact for critical - "stable-lib": "~1.2.3", // Patch only for stable - "modern-lib": "^1.2.3" // Minor OK for active - } -} -``` - -
- -
-Deep Dive: Conflict Resolution - -### Node.js Conflicts - -**Diagnosis:** -```bash -npm ls package-name # See dependency tree -npm explain package-name # Why installed -yarn why package-name # Yarn equivalent -``` - -**Resolution with overrides:** -```json -// package.json -{ - "overrides": { - "lodash": "^4.18.0" - } -} -``` - -**Resolution with resolutions (Yarn):** -```json -{ - "resolutions": { - "lodash": "^4.18.0" - } -} -``` - -### Python Conflicts - -**Diagnosis:** -```bash -pip check -pipdeptree -p package-name -``` - -**Resolution:** -```bash -# Use virtual environment -python -m venv venv -source venv/bin/activate -pip install -r requirements.txt - -# Or use constraints -pip install -c constraints.txt -r requirements.txt -``` - -
- ---- - -## Script Reference +## Scripts | Script | Purpose | |--------|---------| -| `scripts/check-tool.sh` | Verify tool is installed | -| `scripts/run-taze.sh` | Run taze with proper flags | - ---- - -## Related Tools - -| Tool | Language | Purpose | -|------|----------|---------| -| [taze](https://github.com/antfu-collective/taze) | Node.js | Smart dependency updates | -| [npm-check-updates](https://github.com/raineorshine/npm-check-updates) | Node.js | Alternative to taze | -| [pip-review](https://github.com/jgonggrijp/pip-review) | Python | Interactive pip updates | -| [cargo-edit](https://github.com/killercup/cargo-edit) | Rust | Cargo dependency management | -| [bundler-audit](https://github.com/rubysec/bundler-audit) | Ruby | Security auditing | +| `scripts/check-tool.sh` | Verify a tool is installed, print install hint if missing | +| `scripts/run-taze.sh` | Run taze with safe flags (minor mode, workspace-aware) | diff --git a/skills/dependency-updater/references/commands-by-language.md b/skills/dependency-updater/references/commands-by-language.md new file mode 100644 index 0000000..0f38ef7 --- /dev/null +++ b/skills/dependency-updater/references/commands-by-language.md @@ -0,0 +1,194 @@ +# Commands by Language + +Reference for the dependency-updater skill. Each section covers scan, update, install, and audit commands. + +--- + +## Node.js (npm / yarn / pnpm) + +```bash +# Preferred: taze (respects workspace:* protocols, caret/tilde ranges) +taze # scan all +taze minor --write # apply minor+patch +taze major --write --include pkg1,pkg2 # apply approved majors only +taze -r # recursive (monorepo) + +# Fallback: npm +npm outdated +npm update # minor/patch within ranges only +npm install pkg@latest # specific package to latest + +# Install after changes +npm install # npm +pnpm install # pnpm +yarn # yarn + +# Security +npm audit # show vulnerabilities +npm audit fix # fix without breaking semver (safe) +# DO NOT: npm audit fix --force # breaks constraints + +# Conflict diagnosis +npm ls pkg-name # show why a version was installed +npm explain pkg-name # dependency path explanation +npm dedupe # collapse duplicate transitive versions +``` + +**Lock file formats**: `package-lock.json` (npm), `yarn.lock` (yarn), `pnpm-lock.yaml` (pnpm). Never commit more than one. + +--- + +## Python + +```bash +# Scan +pip list --outdated +pip-review # interactive review + +# Update +pip install --upgrade pkg-name # single package +# WARNING: only inside active virtualenv: +pip-review --auto # all packages + +# Verify consistency +pip check # detect incompatible installed packages + +# Security +pip-audit # CVE scan via PyPI advisory db +safety check # alternative (requires API key for full data) + +# Conflict resolution +pipdeptree -p pkg-name # why a package is at its current version +pip-compile requirements.in # regenerate requirements.txt with full resolution +``` + +**Virtualenv check before any mutation**: `python -c "import sys; assert sys.prefix != sys.base_prefix, 'Not in venv'"`. + +--- + +## Go + +```bash +# Scan +go list -m -u all # show available updates for all modules + +# Update (targeted — safer than bulk -u) +go get pkg@latest # single module +go get pkg@v1.2.3 # pin to specific version + +# CAUTION with bulk update: +# go get -u ./... — ignores replace directives for indirect deps + +# Tidy (always run after any go get) +go mod tidy # remove unused, add missing, update go.sum + +# Verify (supply chain check) +go mod verify # verify go.sum hashes match downloaded content + +# Security +govulncheck ./... # Go vulnerability database scan +``` + +--- + +## Rust + +```bash +# Scan +cargo outdated # requires cargo-outdated: cargo install cargo-outdated + +# Update (within Cargo.toml semver ranges) +cargo update # all crates within ranges +cargo update -p crate-name # single crate + +# After update, always verify workspace compiles +cargo check --workspace + +# Security +cargo audit # requires cargo-audit: cargo install cargo-audit +``` + +--- + +## Ruby + +```bash +# Scan +bundle outdated + +# Update (conservative = only updates the named gem's subgraph) +bundle update --conservative gem-name +bundle update # full re-solve (use carefully — see NEVER list) + +# Install +bundle install + +# Security +bundle audit # requires: gem install bundler-audit +bundle audit update # refresh the advisory database first +``` + +--- + +## Java (Maven) + +```bash +# Scan +mvn versions:display-dependency-updates +mvn versions:display-plugin-updates + +# Update +mvn versions:use-latest-releases # updates to latest release (skips snapshots) +mvn versions:use-latest-versions # includes snapshots + +# Security +mvn dependency:tree # review full tree +mvn dependency-check:check # OWASP dependency check plugin +``` + +--- + +## .NET + +```bash +# Scan +dotnet list package --outdated +dotnet list package --vulnerable # security scan + +# Update (no bulk command — must update per package) +dotnet add package PackageName # latest stable +dotnet add package PackageName --version X.Y.Z # specific version + +# Tool: dotnet-outdated (third-party, recommended) +dotnet tool install -g dotnet-outdated +dotnet outdated # then apply interactively +``` + +--- + +## Emergency Resets (last resort only) + +### Node.js +```bash +rm -rf node_modules package-lock.json +npm cache clean --force +npm install +``` +Risk: all transitive versions re-resolved; may introduce new breakage. + +### Python +```bash +deactivate +rm -rf venv +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +pip check +``` + +### Go +```bash +rm go.sum +go mod tidy # regenerates go.sum from module cache or network +go mod verify # confirm integrity +``` diff --git a/skills/design-system-starter/SKILL.md b/skills/design-system-starter/SKILL.md index 4d37e9b..d5f71e8 100644 --- a/skills/design-system-starter/SKILL.md +++ b/skills/design-system-starter/SKILL.md @@ -1,603 +1,171 @@ --- name: design-system-starter -description: Create and evolve design systems with design tokens, component architecture, accessibility guidelines, and documentation templates. Ensures consistent, scalable, and accessible UI across products. +description: Create and evolve design systems — design tokens (W3C/Style Dictionary format), atomic component architecture, theming, dark mode, WCAG 2.1 accessibility, and documentation scaffolding. Trigger phrases: "design system", "design tokens", "component library", "atomic design", "token architecture", "WCAG compliance", "dark mode theming". license: MIT metadata: - version: 1.0.0 + version: 2.0.0 tags: [design-system, ui, components, design-tokens, accessibility, frontend] --- # Design System Starter -Build robust, scalable design systems that ensure visual consistency and exceptional user experiences. - --- -## Quick Start +## Mindset -Just describe what you need: +1. **Tokens are contracts, not values.** Primitive tokens own the raw hex/rem; semantic tokens reference primitives by alias. Changing a primitive propagates everywhere — but only if consumers use semantic tokens, never primitives directly in components. -``` -Create a design system for my React app with dark mode support -``` +2. **The atomic hierarchy is a dependency graph, not a naming convention.** Atoms have zero component dependencies. Molecules depend only on atoms. Breaking this means any atom refactor cascades unpredictably into molecules and organisms. Enforce it structurally, not just by folder name. -That's it. The skill provides tokens, components, and accessibility guidelines. - ---- +3. **Accessibility debt compounds faster than tech debt.** Retrofitting focus traps, ARIA live regions, and color contrast into an established library costs 3-5x building them in. Build accessible primitives first; accessibility is cheaper at atom level than organism level. -## Triggers +4. **Dark mode is a theming problem, not a CSS problem.** If dark mode is solved with `dark:` class overrides scattered across components rather than semantic token swaps, you've made every future theme (high-contrast, branded) require the same scattered changes. Semantic tokens that swap at theme boundary is the only scalable approach. -| Trigger | Example | -|---------|---------| -| Create design system | "Create a design system for my app" | -| Design tokens | "Set up design tokens for colors and spacing" | -| Component architecture | "Design component structure using atomic design" | -| Accessibility | "Ensure WCAG 2.1 compliance for my components" | -| Dark mode | "Implement theming with dark mode support" | +5. **The system's API surface is harder to change than its internals.** Prop names, slot patterns, and export shapes become load-bearing the moment a second team adopts the library. Treat the component API as a public contract from day one. --- -## Quick Reference +## Navigation -| Task | Output | -|------|--------| -| Design tokens | Color, typography, spacing, shadows JSON | -| Component structure | Atomic design hierarchy (atoms, molecules, organisms) | -| Theming | CSS variables or ThemeProvider setup | -| Accessibility | WCAG 2.1 AA compliant patterns | -| Documentation | Component docs with props, examples, a11y notes | +**Use this skill when**: +- Starting a design token architecture from scratch or auditing an existing one +- Scaffolding component libraries with React/Vue/Svelte + TypeScript +- Implementing multi-theme support (dark mode, branded variants, high-contrast) +- Establishing WCAG 2.1 AA compliance patterns for an existing or new library +- Producing Style Dictionary / W3C DTCG token JSON structures +- Advising on atomic design component decomposition ---- +**Do NOT use this skill when**: +- The user wants a single one-off component (use a generic React/CSS skill instead) +- The project already has a mature design system (Material UI, Chakra, Radix) and just needs customization — advise extending the existing system rather than creating a parallel one +- The task is purely visual design (Figma layout, brand identity) with no implementation component -## Bundled Resources +If the component library is built on Material UI, also load mui for MUI-specific token consumption via sx shorthand and slotProps customization. -- `references/component-examples.md` - Complete component implementations -- `templates/design-tokens-template.json` - W3C design token format -- `templates/component-template.tsx` - React component template -- `checklists/design-system-checklist.md` - Design system audit checklist +**Quick decision tree**: +- Has an existing component library? → Extend via tokens/theming, don't rebuild atoms +- Greenfield project? → Start with token tiers, then atoms, then documentation tooling +- Existing codebase with inconsistent styles? → Audit first (`checklists/design-system-checklist.md`), then extract tokens from existing values --- -## Design System Philosophy - -### What is a Design System? - -A design system is more than a component library—it's a collection of: - -1. **Design Tokens**: Foundational design decisions (colors, spacing, typography) -2. **Components**: Reusable UI building blocks -3. **Patterns**: Common UX solutions and compositions -4. **Guidelines**: Rules, principles, and best practices -5. **Documentation**: How to use everything effectively +## Philosophy -### Core Principles - -**1. Consistency Over Creativity** -- Predictable patterns reduce cognitive load -- Users learn once, apply everywhere -- Designers and developers speak the same language - -**2. Accessible by Default** -- WCAG 2.1 Level AA compliance minimum -- Keyboard navigation built-in -- Screen reader support from the start - -**3. Scalable and Maintainable** -- Design tokens enable global changes -- Component composition reduces duplication -- Versioning and deprecation strategies - -**4. Developer-Friendly** -- Clear API contracts -- Comprehensive documentation -- Easy to integrate and customize +A design system is a living contract between design and engineering — not a component dump. Every decision at the token and API layer is a long-term commitment. Optimize for the maintenance burden of the second year, not the delivery speed of the first sprint. --- -## Design Tokens - -Design tokens are the atomic design decisions that define your system's visual language. - -### Token Categories - -#### 1. Color Tokens - -**Primitive Colors** (Raw values): -```json -{ - "color": { - "primitive": { - "blue": { - "50": "#eff6ff", - "100": "#dbeafe", - "200": "#bfdbfe", - "300": "#93c5fd", - "400": "#60a5fa", - "500": "#3b82f6", - "600": "#2563eb", - "700": "#1d4ed8", - "800": "#1e40af", - "900": "#1e3a8a", - "950": "#172554" - } - } - } -} -``` +## NEVER -**Semantic Colors** (Contextual meaning): -```json -{ - "color": { - "semantic": { - "brand": { - "primary": "{color.primitive.blue.600}", - "primary-hover": "{color.primitive.blue.700}", - "primary-active": "{color.primitive.blue.800}" - }, - "text": { - "primary": "{color.primitive.gray.900}", - "secondary": "{color.primitive.gray.600}", - "tertiary": "{color.primitive.gray.500}", - "disabled": "{color.primitive.gray.400}", - "inverse": "{color.primitive.white}" - }, - "background": { - "primary": "{color.primitive.white}", - "secondary": "{color.primitive.gray.50}", - "tertiary": "{color.primitive.gray.100}" - }, - "feedback": { - "success": "{color.primitive.green.600}", - "warning": "{color.primitive.yellow.600}", - "error": "{color.primitive.red.600}", - "info": "{color.primitive.blue.600}" - } - } - } -} -``` +- **NEVER let components consume primitive tokens directly** — because when `blue-600` becomes `brand-600` in a rebrand, every component needs a manual update instead of one token swap. Components must only reference semantic tokens (`color.text.primary`, `color.brand.interactive`). -**Accessibility**: Ensure color contrast ratios meet WCAG 2.1 Level AA: -- Normal text: 4.5:1 minimum -- Large text (18pt+ or 14pt+ bold): 3:1 minimum -- UI components and graphics: 3:1 minimum - -#### 2. Typography Tokens - -```json -{ - "typography": { - "fontFamily": { - "sans": "'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif", - "serif": "'Georgia', 'Times New Roman', serif", - "mono": "'Fira Code', 'Courier New', monospace" - }, - "fontSize": { - "xs": "0.75rem", // 12px - "sm": "0.875rem", // 14px - "base": "1rem", // 16px - "lg": "1.125rem", // 18px - "xl": "1.25rem", // 20px - "2xl": "1.5rem", // 24px - "3xl": "1.875rem", // 30px - "4xl": "2.25rem", // 36px - "5xl": "3rem" // 48px - }, - "fontWeight": { - "normal": 400, - "medium": 500, - "semibold": 600, - "bold": 700 - }, - "lineHeight": { - "tight": 1.25, - "normal": 1.5, - "relaxed": 1.75, - "loose": 2 - }, - "letterSpacing": { - "tight": "-0.025em", - "normal": "0", - "wide": "0.025em" - } - } -} -``` +- **NEVER use `rgba()` hardcodes in component styles** — because they don't participate in the token system, break dark mode, and are invisible to Style Dictionary transforms. All color values must trace back to a token reference. -#### 3. Spacing Tokens - -**Scale**: Use a consistent spacing scale (commonly 4px or 8px base) - -```json -{ - "spacing": { - "0": "0", - "1": "0.25rem", // 4px - "2": "0.5rem", // 8px - "3": "0.75rem", // 12px - "4": "1rem", // 16px - "5": "1.25rem", // 20px - "6": "1.5rem", // 24px - "8": "2rem", // 32px - "10": "2.5rem", // 40px - "12": "3rem", // 48px - "16": "4rem", // 64px - "20": "5rem", // 80px - "24": "6rem" // 96px - } -} -``` +- **NEVER create circular token references** (`A → B → A`) — Style Dictionary resolves references in topological order; cycles produce silent build failures or `undefined` output values that only surface at runtime. Always audit with `style-dictionary build --verbose` before shipping token changes. -**Component-Specific Spacing**: -```json -{ - "component": { - "button": { - "padding-x": "{spacing.4}", - "padding-y": "{spacing.2}", - "gap": "{spacing.2}" - }, - "card": { - "padding": "{spacing.6}", - "gap": "{spacing.4}" - } - } -} -``` +- **NEVER solve dark mode with duplicated class overrides** (e.g., Tailwind `dark:bg-gray-900` on every element) — it couples theme logic to every component, making a third theme (high-contrast, branded) require a full codebase scan. Use semantic token swap at the `:root`/`[data-theme]` boundary instead. -#### 4. Border Radius Tokens - -```json -{ - "borderRadius": { - "none": "0", - "sm": "0.125rem", // 2px - "base": "0.25rem", // 4px - "md": "0.375rem", // 6px - "lg": "0.5rem", // 8px - "xl": "0.75rem", // 12px - "2xl": "1rem", // 16px - "full": "9999px" - } -} -``` +- **NEVER ship components with CSS-in-JS runtime cost in a performance-critical library** — libraries like `emotion`/`styled-components` with dynamic interpolations re-compute styles per render. For shared libraries, prefer CSS variables (zero runtime) or build-time CSS Modules. Runtime CSS-in-JS is acceptable in application code, not in a design system consumed by many teams. -#### 5. Shadow Tokens - -```json -{ - "shadow": { - "xs": "0 1px 2px 0 rgba(0, 0, 0, 0.05)", - "sm": "0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px -1px rgba(0, 0, 0, 0.1)", - "base": "0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -2px rgba(0, 0, 0, 0.1)", - "md": "0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -4px rgba(0, 0, 0, 0.1)", - "lg": "0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 8px 10px -6px rgba(0, 0, 0, 0.1)", - "xl": "0 25px 50px -12px rgba(0, 0, 0, 0.25)" - } -} -``` +- **NEVER namespace tokens with the framework name** (e.g., `react-button-primary`) — tokens must be framework-agnostic because the same token set may be consumed by React, native mobile, and email templates simultaneously. Token names describe *intent*, not implementation. + +- **NEVER skip the `composite` token tier for complex multi-property values** — shadows, typography styles, and border shorthand are composite tokens. Treating them as loose scalar groups makes it impossible for Style Dictionary to output platform-correct formats (Android uses separate shadow properties; iOS uses NSShadow). --- -## Component Architecture +## When Things Go Wrong -### Atomic Design Methodology +| Situation | Likely Cause | Recovery | +|-----------|-------------|----------| +| Style Dictionary build produces `undefined` values | Circular token reference or misspelled alias path | Run `style-dictionary build --verbose`; check reference chain with `{token.path.here}` syntax audit | +| Dark mode flashes white on load (FOUC) | Theme class applied after hydration (SSR/SSG mismatch) | Set theme attribute server-side; use `@test.com` + +--- + +## Blocked Test Escalation Decision Tree + +``` +Test is blocked → + Can I work around it and test the same behavior another way? + YES → document workaround, proceed, note in report + NO → Is the blocker expected to resolve within this sprint? + YES → flag as BLOCKED, estimate when to retest + NO → escalate: this is a test environment/infrastructure risk + → notify lead, add to risk register, adjust exit criteria +``` + +**Rule:** Never mark a blocked test as PASS to maintain metrics. Mark it BLOCKED with a reason and estimated resolution. + +--- + +## Regression Suite Culling Policy Template + +Use this policy to prevent suite bloat: + +``` +RETIRE a test case when ALL of the following are true: + 1. It has passed 20+ consecutive runs without modification + 2. The underlying code area has not changed in 90+ days + 3. No production bug has been filed against this area in 6 months + +ARCHIVE (don't delete) — move to archive/ folder with: + - Date archived + - Rationale + - Reactivation condition ("reactivate if payment module changes") + +ADD to suite immediately when: + - A production bug is fixed (test for the exact defect) + - A new integration point is added + - A security finding is remediated +``` + +--- + +## Release Gate: Go / No-Go Checklist + +Before signing off on a release, verify: + +**Hard stops (any NO = no-go):** +- [ ] All P0 (critical) test cases passed +- [ ] Zero open severity-critical bugs +- [ ] Zero open severity-high bugs without an approved workaround +- [ ] Regression suite executed on release build (not a branch build) +- [ ] Security surfaces tested (auth, input validation, file upload) + +**Risk accepts (document if NO, owner must sign off):** +- [ ] P1 tests ≥90% pass rate +- [ ] Mobile/responsive tested on primary device matrix +- [ ] Accessibility spot-checked +- [ ] Performance baseline within 20% of previous release + +**Documentation complete:** +- [ ] Test run report filed with pass/fail counts +- [ ] All open bugs triaged with priority and ETA +- [ ] Rollback plan identified if critical bug emerges post-release + +--- + +## Severity vs. Priority: The Matrix + +Teams conflate these constantly. Use this: + +| | Low Priority (can wait) | High Priority (fix now) | +|---|---|---| +| **High Severity** (data loss, crash) | Admin-only import crash with 3 users/month | Checkout crash on primary flow | +| **Low Severity** (cosmetic, typo) | Misaligned icon in footer | Wrong price text on pricing page | + +**Severity** = scope of damage if triggered +**Priority** = urgency of fix relative to release timeline + +Always set them independently. If they always match, your team is conflating them. + +--- + +## Common QA Anti-Patterns With Non-Obvious Reasons + +| Anti-Pattern | Why It's Harmful (non-obvious) | +|---|---| +| Testing only with admin accounts | Admin users often bypass the permission/validation logic that regular users hit; bugs hide in lower-privilege flows | +| Running regression on staging with prod data | Test data side-effects (deletions, state changes) on prod data cause incidents; staging must use synthetic data | +| Writing test cases after execution | Confirmation bias — you write the expected result as what you observed, not what the spec says; bugs get missed | +| Using sequential test IDs without feature prefix | TC-1 through TC-500 in a flat namespace means a deleted test leaves a gap and you can't tell which feature lost coverage | +| Logging every UI variation as a separate bug | 10 bugs for "button color wrong on 10 pages" creates triage noise; one bug with a list of affected pages is the right pattern | +| Retesting the same happy path repeatedly | Happy paths don't find bugs; they confirm the system works when nothing goes wrong. Invest retest cycles in boundary and negative cases | diff --git a/skills/react-dev/SKILL.md b/skills/react-dev/SKILL.md index bd288f3..ffa0190 100644 --- a/skills/react-dev/SKILL.md +++ b/skills/react-dev/SKILL.md @@ -1,391 +1,96 @@ --- name: react-dev version: 1.0.0 -description: This skill should be used when building React components with TypeScript, typing hooks, handling events, or when React TypeScript, React 19, Server Components are mentioned. Covers type-safe patterns for React 18-19 including generic components, proper event typing, and routing integration (TanStack Router, React Router). +description: "Type-safe React component patterns for TypeScript — generic components, discriminated union props, React 19 ref/action patterns, TanStack Router and React Router v7 type safety. Use when writing or reviewing typed React components, hooks, Server Components, or router integration. Triggers: TypeScript React component, React 19, useActionState, forwardRef migration, TanStack Router, Server Action. NOT for MUI-specific or vanilla JS React." --- # React TypeScript Type-safe React = compile-time guarantees = confident refactoring. - +## Mindset -- Building typed React components -- Implementing generic components -- Typing event handlers, forms, refs -- Using React 19 features (Actions, Server Components, use()) -- Router integration (TanStack Router, React Router) -- Custom hooks with proper typing - -NOT for: non-React TypeScript, vanilla JS React - - - - - -React 19 breaking changes require migration. Key patterns: - -**ref as prop** - forwardRef deprecated: - -```typescript -// React 19 - ref as regular prop -type ButtonProps = { - ref?: React.Ref; -} & React.ComponentPropsWithoutRef<'button'>; - -function Button({ ref, children, ...props }: ButtonProps) { - return ; -} -``` - -**useActionState** - replaces useFormState: - -```typescript -import { useActionState } from 'react'; - -type FormState = { errors?: string[]; success?: boolean }; - -function Form() { - const [state, formAction, isPending] = useActionState(submitAction, {}); - return
...
; -} -``` - -**use()** - unwraps promises/context: - -```typescript -function UserProfile({ userPromise }: { userPromise: Promise }) { - const user = use(userPromise); // Suspends until resolved - return
{user.name}
; -} -``` - -See [react-19-patterns.md](references/react-19-patterns.md) for useOptimistic, useTransition, migration checklist. - -
- - - -**Props** - extend native elements: - -```typescript -type ButtonProps = { - variant: 'primary' | 'secondary'; -} & React.ComponentPropsWithoutRef<'button'>; - -function Button({ variant, children, ...props }: ButtonProps) { - return ; -} -``` - -**Children typing**: - -```typescript -type Props = { - children: React.ReactNode; // Anything renderable - icon: React.ReactElement; // Single element - render: (data: T) => React.ReactNode; // Render prop -}; -``` - -**Discriminated unions** for variant props: - -```typescript -type ButtonProps = - | { variant: 'link'; href: string } - | { variant: 'button'; onClick: () => void }; - -function Button(props: ButtonProps) { - if (props.variant === 'link') { - return Link; - } - return ; -} -``` - - - - - -Use specific event types for accurate target typing: - -```typescript -// Mouse -function handleClick(e: React.MouseEvent) { - e.currentTarget.disabled = true; -} - -// Form -function handleSubmit(e: React.FormEvent) { - e.preventDefault(); - const formData = new FormData(e.currentTarget); -} - -// Input -function handleChange(e: React.ChangeEvent) { - console.log(e.target.value); -} - -// Keyboard -function handleKeyDown(e: React.KeyboardEvent) { - if (e.key === 'Enter') e.currentTarget.blur(); -} -``` - -See [event-handlers.md](references/event-handlers.md) for focus, drag, clipboard, touch, wheel events. - - - - - -**useState** - explicit for unions/null: - -```typescript -const [user, setUser] = useState(null); -const [status, setStatus] = useState<'idle' | 'loading'>('idle'); -``` - -**useRef** - null for DOM, value for mutable: - -```typescript -const inputRef = useRef(null); // DOM - use ?. -const countRef = useRef(0); // Mutable - direct access -``` +1. **Props shape behavior, not just data.** Discriminated unions at the prop level eliminate impossible states before runtime — model your variants in the type, not in conditionals. +2. **Inference is a contract.** When TypeScript infers a generic, it locks in the relationship across all usages. Explicit annotations at call sites break that contract and cost you safety downstream. +3. **Server/Client boundary is a compile-time seam.** Treat `'use server'` and `'use client'` like interface boundaries — only serializable data crosses them. Promises passed from Server to Client via `use()` are the intended bridge. +4. **Avoid null coercion on DOM refs.** `inputRef.current!` trades a compile error for a runtime crash. Guard with `?.` or initialize with a default value. +5. **Routing type safety is architectural, not cosmetic.** Choosing TanStack Router vs React Router v7 determines whether search params, path params, and loader data are typed at definition time or inferred from generated artifacts — pick based on project constraints, not familiarity. -**useReducer** - discriminated unions for actions: +## Navigation -```typescript -type Action = - | { type: 'increment' } - | { type: 'set'; payload: number }; - -function reducer(state: State, action: Action): State { - switch (action.type) { - case 'set': return { ...state, count: action.payload }; - default: return state; - } -} -``` - -**Custom hooks** - tuple returns with as const: - -```typescript -function useToggle(initial = false) { - const [value, setValue] = useState(initial); - const toggle = () => setValue(v => !v); - return [value, toggle] as const; -} -``` - -**useContext** - null guard pattern: - -```typescript -const UserContext = createContext(null); - -function useUser() { - const user = useContext(UserContext); - if (!user) throw new Error('useUser outside UserProvider'); - return user; -} -``` - -See [hooks.md](references/hooks.md) for useCallback, useMemo, useImperativeHandle, useSyncExternalStore. - - - - - -Generic components infer types from props - no manual annotations at call site. - -**Pattern** - keyof T for column keys, render props for custom rendering: - -```typescript -type Column = { - key: keyof T; - header: string; - render?: (value: T[keyof T], item: T) => React.ReactNode; -}; - -type TableProps = { - data: T[]; - columns: Column[]; - keyExtractor: (item: T) => string | number; -}; - -function Table({ data, columns, keyExtractor }: TableProps) { - return ( - - - {columns.map(col => )} - - - {data.map(item => ( - - {columns.map(col => ( - - ))} - - ))} - -
{col.header}
- {col.render ? col.render(item[col.key], item) : String(item[col.key])} -
- ); -} -``` - -**Constrained generics** for required properties: - -```typescript -type HasId = { id: string | number }; - -function List({ items }: { items: T[] }) { - return
    {items.map(item =>
  • ...
  • )}
; -} -``` - -See [generic-components.md](examples/generic-components.md) for Select, List, Modal, FormField patterns. - -
- - - -React 19 Server Components run on server, can be async. - -**Async data fetching**: - -```typescript -export default async function UserPage({ params }: { params: { id: string } }) { - const user = await fetchUser(params.id); - return
{user.name}
; -} -``` - -**Server Actions** - 'use server' for mutations: - -```typescript -'use server'; - -export async function updateUser(userId: string, formData: FormData) { - await db.user.update({ where: { id: userId }, data: { ... } }); - revalidatePath(`/users/${userId}`); -} -``` - -**Client + Server Action**: - -```typescript -'use client'; +**Use this skill when:** +- Building typed React components, hooks, or event handlers +- Implementing generic components (Table, List, Select, Modal) +- Typing event handlers, forms, refs +- Using React 19 features (Actions, Server Components, `use()`, `useActionState`) +- Integrating TanStack Router or React Router v7 +- Custom hooks with proper typing -import { useActionState } from 'react'; -import { updateUser } from '@/actions/user'; +**Do NOT use this skill when:** +- Working in non-React TypeScript (Node scripts, libraries, Zod schemas standalone) +- Vanilla JS React with no TypeScript -function UserForm({ userId }: { userId: string }) { - const [state, formAction, isPending] = useActionState( - (prev, formData) => updateUser(userId, formData), {} - ); - return
...
; -} -``` +Building a shared component library rather than app components? Use design-system-starter for token architecture, atomic hierarchy, and WCAG 2.1 accessibility patterns. -**use() for promise handoff**: +If the user is working with Material UI components specifically, switch to the mui skill for MUI v7 sx caching, slotProps, and Grid v2 patterns. -```typescript -// Server: pass promise without await -async function Page() { - const userPromise = fetchUser('123'); - return ; -} +### Router Decision Tree -// Client: unwrap with use() -'use client'; -function UserProfile({ userPromise }: { userPromise: Promise }) { - const user = use(userPromise); - return
{user.name}
; -} ``` - -See [server-components.md](examples/server-components.md) for parallel fetching, streaming, error boundaries. - -
- - - -Both TanStack Router and React Router v7 provide type-safe routing solutions. - -**TanStack Router** - Compile-time type safety with Zod validation: - -```typescript -import { createRoute } from '@tanstack/react-router'; -import { z } from 'zod'; - -const userRoute = createRoute({ - path: '/users/$userId', - component: UserPage, - loader: async ({ params }) => ({ user: await fetchUser(params.userId) }), - validateSearch: z.object({ - tab: z.enum(['profile', 'settings']).optional(), - page: z.number().int().positive().default(1), - }), -}); - -function UserPage() { - const { user } = useLoaderData({ from: userRoute.id }); - const { tab, page } = useSearch({ from: userRoute.id }); - const { userId } = useParams({ from: userRoute.id }); -} +Does the project use a meta-framework (Next.js, Remix)? +├─ Yes → Next.js: use App Router + Server Actions (no separate router library needed) +│ Remix/React Router v7: already baked in — use Framework Mode loaders/actions +└─ No → Is file-based routing required? + ├─ Yes → React Router v7 (Framework Mode with Vite plugin) + └─ No → Does the team need compile-time Zod search-param validation? + ├─ Yes → TanStack Router (validateSearch + generated route tree) + └─ No → Either works; prefer TanStack Router for greenfield SPA, + React Router v7 for teams already familiar with Remix patterns ``` -**React Router v7** - Automatic type generation with Framework Mode: - -```typescript -import type { Route } from "./+types/user"; +**TanStack Router strengths:** Compile-time route tree, Zod `validateSearch`, full type inference from `useLoaderData`/`useSearch`/`useParams` without code generation at runtime. -export async function loader({ params }: Route.LoaderArgs) { - return { user: await fetchUser(params.userId) }; -} +**React Router v7 strengths:** Framework Mode generates `+types/` per route — loader return type is automatically inferred by component. Familiar for Remix users. Better SSR story. -export default function UserPage({ loaderData }: Route.ComponentProps) { - const { user } = loaderData; // Typed from loader - return

{user.name}

; -} -``` +MANDATORY — read [references/tanstack-router.md](references/tanstack-router.md) and [references/react-router.md](references/react-router.md) before implementing routing. -See [tanstack-router.md](references/tanstack-router.md) for TanStack patterns and [react-router.md](references/react-router.md) for React Router patterns. +## Philosophy -
+Model the impossible as unrepresentable: every `any`, every optional field that should be required, and every missing discriminant is a bug deferred to production. Type-safe React is not about satisfying the compiler — it is about encoding product rules in a language the compiler enforces. - +## NEVER -ALWAYS: -- Specific event types (MouseEvent, ChangeEvent, etc) -- Explicit useState for unions/null -- ComponentPropsWithoutRef for native element extension -- Discriminated unions for variant props -- as const for tuple returns -- ref as prop in React 19 (no forwardRef) -- useActionState for form actions -- Type-safe routing patterns (see routing section) +- **`any` for event handlers** — `any` widens the handler to accept events from unrelated elements, so `e.target.value` won't narrow correctly and you'll get silent `undefined` at runtime instead of a type error at compile time. +- **`JSX.Element` as children type** — `JSX.Element` excludes strings, numbers, arrays, fragments, and `null`, rejecting valid children at compile time while `React.ReactNode` accepts all renderable values correctly. +- **`forwardRef` in React 19+** — `forwardRef` wraps your component in an extra HOC layer and is deprecated; React 19 passes `ref` as a regular prop, so `forwardRef` adds indirection with no benefit and breaks the new ref cleanup return type. +- **`useFormState` (deprecated)** — replaced by `useActionState` which also returns `isPending` as a third value; `useFormState` is removed in React 19 and will throw at runtime. +- **Awaiting promises before passing to `use()`** — `use(await fetchUser())` defeats streaming: it blocks the Server Component until the promise settles, eliminating the concurrent rendering benefit that `use()` + Suspense is designed to provide. +- **Mixing Server and Client component logic in the same file** — a file with both `'use server'` and `'use client'` at module scope is invalid; the bundler treats the entire file as one boundary, so server-only imports (db, fs) will leak into the client bundle. +- **Non-null assertion (`!`) on DOM refs** — `inputRef.current!` crashes if the component unmounts or the ref hasn't attached yet; use optional chaining (`?.`) or guard the call site explicitly. +- **Inline object/function literals in JSX props without `useCallback`/`useMemo`** — every render creates a new reference, triggering child re-renders even when `React.memo` is applied, making memoization silently ineffective. -NEVER: -- any for event handlers -- JSX.Element for children (use ReactNode) -- forwardRef in React 19+ -- useFormState (deprecated) -- Forget null handling for DOM refs -- Mix Server/Client components in same file -- Await promises when passing to use() +## When Things Go Wrong - +| Symptom | Root Cause | Fix | +|---|---|---| +| `Type 'string' is not assignable to type 'never'` on discriminated union | Switch/if is missing a case arm, so TypeScript narrows to `never` | Add the missing branch or an exhaustive check `default: satisfies never` | +| `Property 'current' does not exist` on ref | Ref typed as `RefObject` but used in a non-null context | Use optional chaining `ref.current?.focus()` or check `if (ref.current)` first | +| Server Action throws "Functions cannot be passed directly to Client Components" | Passing an un-serializable callback as a prop across the Server/Client boundary | Wrap with `'use server'` inline or import from a server module; never pass closures | +| `useLoaderData` returns `unknown` in TanStack Router | Route's `loader` return type is not inferred because `from` is missing or wrong | Pass `{ from: routeId }` explicitly: `useLoaderData({ from: '/users/$userId' })` | +| React Router v7 `+types/` import missing | Vite plugin not configured or route file not under the routes directory | Check `vite.config.ts` for `reactRouter()` plugin and verify file is under `app/routes/` | +| Child component re-renders despite `React.memo` | Prop is an inline object/function literal — new reference on every parent render | Hoist the value outside the component or wrap with `useMemo`/`useCallback` | - +## Reference Loading Triggers -- [hooks.md](references/hooks.md) - useState, useRef, useReducer, useContext, custom hooks -- [event-handlers.md](references/event-handlers.md) - all event types, generic handlers -- [react-19-patterns.md](references/react-19-patterns.md) - useActionState, use(), useOptimistic, migration -- [generic-components.md](examples/generic-components.md) - Table, Select, List, Modal patterns -- [server-components.md](examples/server-components.md) - async components, Server Actions, streaming -- [tanstack-router.md](references/tanstack-router.md) - TanStack Router typed routes, search params, navigation -- [react-router.md](references/react-router.md) - React Router v7 loaders, actions, type generation, forms +MANDATORY — read the indicated reference file before working on each task: - +| Task | Reference | +|---|---| +| React 19 features (`ref` as prop, `useActionState`, `use()`, migration) | [references/react-19-changes.md](references/react-19-changes.md) | +| Component props, discriminated unions, children typing, polymorphic components | [references/component-patterns.md](references/component-patterns.md) | +| Server Components, Server Actions, streaming, Server/Client boundary | [references/server-components.md](references/server-components.md) | +| Typing hooks (`useState`, `useRef`, `useReducer`, `useContext`, custom hooks) | [references/hooks-typing.md](references/hooks-typing.md) | +| Generic components (Table, List, Select, Modal, FormField) | [references/generic-components.md](references/generic-components.md) | +| Event handler typing (mouse, form, keyboard, drag, clipboard) | [references/event-handlers.md](references/event-handlers.md) | +| TanStack Router routes, search params, loader data | [references/tanstack-router.md](references/tanstack-router.md) | +| React Router v7 loaders, actions, `+types/` generation | [references/react-router.md](references/react-router.md) | diff --git a/skills/react-dev/references/component-patterns.md b/skills/react-dev/references/component-patterns.md new file mode 100644 index 0000000..4a6be14 --- /dev/null +++ b/skills/react-dev/references/component-patterns.md @@ -0,0 +1,110 @@ +# Component Patterns + +Load this file when building typed React components, extending native elements, or modeling variant props with discriminated unions. + +## Props — extend native elements + +Use `ComponentPropsWithoutRef` to inherit all native element props without the ref type conflict. + +```typescript +type ButtonProps = { + variant: 'primary' | 'secondary'; +} & React.ComponentPropsWithoutRef<'button'>; + +function Button({ variant, children, ...props }: ButtonProps) { + return ; +} +``` + +## Children typing + +```typescript +type Props = { + children: React.ReactNode; // Anything renderable (strings, numbers, arrays, null) + icon: React.ReactElement; // Single React element only + render: (data: T) => React.ReactNode; // Render prop + label: string | React.ReactElement; // String or element +}; +``` + +Never use `JSX.Element` for children — it excludes strings, numbers, arrays, fragments, and `null`. + +## Discriminated unions for variant props + +Model variants as discriminants so TypeScript narrows the type in each branch: + +```typescript +type ButtonProps = + | { variant: 'link'; href: string; onClick?: never } + | { variant: 'button'; onClick: () => void; href?: never }; + +function Button(props: ButtonProps) { + if (props.variant === 'link') { + return Link; + } + return ; +} +``` + +## Exhaustive checks on discriminated unions + +When a switch/if is missing a case arm TypeScript narrows to `never`. Use `satisfies never` to get a compile error instead of a silent fallthrough: + +```typescript +function renderVariant(props: ButtonProps) { + switch (props.variant) { + case 'link': return ; + case 'button': return