From 8ae2e2515f4548a5bceee9794b5a7cc5f8b64ac8 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Thu, 14 May 2026 13:07:45 +0100 Subject: [PATCH 1/2] feat: add paths-ignore input; default-skip vendored/fixture content Adopts the provenance-aware suppression pattern established by hyperpolymath/hypatia#243 and matched in hyperpolymath/a2ml-validate-action#7 for the K9 validator. Content-pattern validators must distinguish a target file from a vendored / fixture / training-corpus file that legitimately contains the very pattern being checked. This action was firing "Pedigree block missing 'name' field" on every K9 file in vendored project trees (e.g. verified-container-spec/ consumed by stapeln). The vendored files have their own pedigree declarations in their upstream context. New input `paths-ignore` (newline-separated, substring match), default-on with the canonical RSR vendored / fixture path set. Pass '' to disable. Co-Authored-By: Claude Opus 4.7 --- README.adoc | 21 +++++++++++++++++++++ action.yml | 20 ++++++++++++++++++++ validate-k9.sh | 41 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/README.adoc b/README.adoc index 9ac90c9..fc2347b 100644 --- a/README.adoc +++ b/README.adoc @@ -42,6 +42,8 @@ jobs: with: path: '.' # Directory to scan (default: repo root) strict: 'false' # Promote warnings to errors (default: false) + # paths-ignore: defaults to vendored / fixture patterns; override + # via newline-separated string. Use '' to disable. ---- === Inputs @@ -57,8 +59,27 @@ jobs: | `strict` | `false` | When `true`, warnings become errors and the action fails on any issue + +| `paths-ignore` +| _vendored & fixture defaults_ +| Newline-separated path fragments to skip. Substring match against each + file path. Default set: `vendor/`, `vendored/`, `verified-container-spec/`, + `.audittraining/`, `integration/fixtures/`, `test/fixtures/`, + `tests/fixtures/`. Pass an empty string (`paths-ignore: ''`) to disable + and scan everything. See https://github.com/hyperpolymath/hypatia/pull/243 + for the architectural rationale (content-pattern validators must + distinguish targets from fixtures / vendored / training-corpus files + that legitimately contain the very pattern being checked). |=== +==== Why default-on path exemptions? + +K9 files inside vendored projects (e.g. `verified-container-spec/`) carry +their own pedigree declarations in their upstream context — flagging every +such file as "Pedigree block missing 'name' field" is provenance noise. +The defaults match the canonical RSR vendored-content paths; override for +project-specific carve-outs. + === Outputs [cols="1,3"] diff --git a/action.yml b/action.yml index 177c09a..1f2d3e4 100644 --- a/action.yml +++ b/action.yml @@ -30,6 +30,25 @@ inputs: will fail on any validation issue. Defaults to false. required: false default: 'false' + paths-ignore: + description: >- + Newline-separated path fragments to skip. Each line is matched as a + substring against the file's path. Defaults to common vendored / + training-corpus / fixture patterns so consumers don't have to repeat + this carve-out in every repo. Pass an empty string to disable. + Pattern follows hyperpolymath/hypatia#243 — validators that scan + content patterns must distinguish a target file from a fixture / + vendored / training-corpus file that legitimately contains the + pattern being checked. + required: false + default: | + vendor/ + vendored/ + verified-container-spec/ + .audittraining/ + integration/fixtures/ + test/fixtures/ + tests/fixtures/ outputs: files-scanned: @@ -51,5 +70,6 @@ runs: env: INPUT_PATH: ${{ inputs.path }} INPUT_STRICT: ${{ inputs.strict }} + INPUT_PATHS_IGNORE: ${{ inputs.paths-ignore }} run: | "${GITHUB_ACTION_PATH}/validate-k9.sh" diff --git a/validate-k9.sh b/validate-k9.sh index c81dab2..92b5dbb 100755 --- a/validate-k9.sh +++ b/validate-k9.sh @@ -27,6 +27,30 @@ set -euo pipefail SCAN_PATH="${INPUT_PATH:-.}" STRICT="${INPUT_STRICT:-false}" +PATHS_IGNORE_RAW="${INPUT_PATHS_IGNORE:-}" + +# Parse paths-ignore: newline-separated fragments, blank lines and # comments +# stripped. Each fragment is a substring match against the file path. Pattern +# adopted from hyperpolymath/hypatia#243 — content-pattern validators must +# distinguish a target from a vendored / fixture file that legitimately +# contains the very pattern being checked. +PATHS_IGNORE=() +while IFS= read -r _frag; do + # Strip leading and trailing whitespace (canonical bash idiom). + _frag="${_frag#"${_frag%%[![:space:]]*}"}" + _frag="${_frag%"${_frag##*[![:space:]]}"}" + [[ -z "$_frag" || "$_frag" == \#* ]] && continue + PATHS_IGNORE+=("$_frag") +done <<< "$PATHS_IGNORE_RAW" + +# Returns 0 if path should be skipped (matches any ignore fragment) +path_ignored() { + local p="$1" frag + for frag in "${PATHS_IGNORE[@]}"; do + [[ "$p" == *"$frag"* ]] && return 0 + done + return 1 +} # Counters FILES_SCANNED=0 @@ -250,7 +274,22 @@ echo "Scanning ${SCAN_PATH} for K9 files (.k9, .k9.ncl)..." echo "" # Find all K9 files, excluding .git directory -mapfile -t k9_files < <(find "$SCAN_PATH" \( -name '*.k9' -o -name '*.k9.ncl' \) -not -path '*/.git/*' -type f | sort) +mapfile -t k9_candidates < <(find "$SCAN_PATH" \( -name '*.k9' -o -name '*.k9.ncl' \) -not -path '*/.git/*' -type f | sort) + +# Apply paths-ignore filter +k9_files=() +SKIPPED=0 +for _f in "${k9_candidates[@]}"; do + if path_ignored "$_f"; then + SKIPPED=$((SKIPPED + 1)) + continue + fi + k9_files+=("$_f") +done + +if [[ $SKIPPED -gt 0 ]]; then + echo "::notice::Skipped ${SKIPPED} file(s) matching paths-ignore" +fi if [[ ${#k9_files[@]} -eq 0 ]]; then echo "::notice::No K9 files found in ${SCAN_PATH}" From 8df7ace78cb355a5bda427632a5d2cd87242ed90 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Thu, 14 May 2026 13:17:12 +0100 Subject: [PATCH 2/2] fix: count `pedigree = {` opening brace; nested-block close no longer terminates view MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the validator detected the pedigree block start, it `continue`d before counting that line's `{`. So depth started at 0 instead of 1, and the next nested block's closing brace took depth back to 0 — prematurely setting `in_pedigree=false`. Any field (name, version, leash, signature) defined AFTER an inner block close was therefore invisible to the validator, even when present in the file. Repro: any K9 file where `metadata = { name = "…", …, }` is the LAST top-level field in pedigree (the canonical RSR template shape). All 6 templates / examples in hyperpolymath/stapeln#32 hit this — the `security = { … },` block prematurely closed pedigree before `metadata` was reached. Fix: drop the `continue` so the `pedigree = {` line falls through to the brace counter. Depth now starts at 1 and tracks correctly. Verified by mental-trace on `pedigree = { security = {…}, metadata = { name = … } }`: pedigree-line: depth = 1, in_pedigree=true security {: depth = 2 security }: depth = 1, line has `}` but depth > 0 → stay in pedigree metadata {: depth = 2 name = …: captured at depth 2 inside pedigree → has_pedigree_name metadata }: depth = 1 pedigree }: depth = 0 + `}` → in_pedigree=false Co-Authored-By: Claude Opus 4.7 --- validate-k9.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/validate-k9.sh b/validate-k9.sh index 92b5dbb..d9693cf 100755 --- a/validate-k9.sh +++ b/validate-k9.sh @@ -168,12 +168,21 @@ validate_k9() { while IFS= read -r line; do line_num=$((line_num + 1)) - # Detect pedigree block start + # Detect pedigree block start. Note: do NOT `continue` here — the + # `pedigree = {` line itself contains the opening brace that + # establishes the block. Falling through to the brace counter + # below makes depth start at 1, so a subsequent `security = {…},` + # closing brace correctly takes depth to 1 (not 0), keeping us + # inside the pedigree block when later fields (name/version/leash) + # are checked. Previously the `continue` skipped this opening + # brace, depth started at 0, and the first nested block's close + # prematurely terminated the validator's view of the pedigree — + # making `pedigree.metadata.name` invisible. if [[ "$line" =~ ^[[:space:]]*pedigree[[:space:]]*= ]]; then has_pedigree=true in_pedigree=true pedigree_depth=0 - continue + # fall through fi if [[ "$in_pedigree" == "true" ]]; then