From 0d3d0fb55334131fb64798c559cb510e3722d539 Mon Sep 17 00:00:00 2001 From: Flipper Date: Sat, 27 Jun 2026 02:27:09 +0200 Subject: [PATCH 1/4] test(windows): add red regressions for platform-specific failures Adds Windows-only red tests and analysis for native Windows failures found during a Windows red-test campaign. This change contains no production fixes. - windows_non_ascii_repo_path_preserves_definitions (integration): byte-identical TypeScript fixtures indexed under non-ASCII parent paths (Latin-1 accents, Cyrillic, CJK, Greek) extract zero definitions and only File/Folder nodes (5 nodes / 4 edges) versus the ASCII baseline (12 nodes / 20 edges / 5 definitions). The pipeline source readers open files with fopen() on a UTF-8 path, which the Windows CRT interprets in the active ANSI code page; directory discovery already uses the wide API, so files are listed but never parsed. - windows_cli_non_ascii_repo_path_is_honored (integration): the documented `cli index_repository` entrypoint rejects a non-ASCII repo_path because main() does not read the wide command line, so argv arrives in the ANSI code page. Both reproduce at the product surface (real MCP process, real stdio, real SQLite DB), are deterministic, and pass on Linux/macOS. A PowerShell runner builds the binary and runs the suite; standard-library Python only. See tests/windows/RED_TEST_ANALYSIS.md for environment, commands, ruled-out seed areas, and suspected fix locations. Signed-off-by: Flipper --- .gitignore | 4 + scripts/test-windows.ps1 | 94 +++++++++++++ tests/windows/RED_TEST_ANALYSIS.md | 180 ++++++++++++++++++++++++ tests/windows/mcp_stdio.py | 146 +++++++++++++++++++ tests/windows/test_cli_non_ascii_arg.py | 104 ++++++++++++++ tests/windows/test_non_ascii_path.py | 166 ++++++++++++++++++++++ 6 files changed, 694 insertions(+) create mode 100644 scripts/test-windows.ps1 create mode 100644 tests/windows/RED_TEST_ANALYSIS.md create mode 100644 tests/windows/mcp_stdio.py create mode 100644 tests/windows/test_cli_non_ascii_arg.py create mode 100644 tests/windows/test_non_ascii_path.py diff --git a/.gitignore b/.gitignore index 2edd00383..966cee583 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,7 @@ soak-results/ # Local npm cache graph-ui/.npm-cache-local/ + +# Python bytecode from tests/windows/ harness +__pycache__/ +*.pyc diff --git a/scripts/test-windows.ps1 b/scripts/test-windows.ps1 new file mode 100644 index 000000000..b426fa350 --- /dev/null +++ b/scripts/test-windows.ps1 @@ -0,0 +1,94 @@ +<# +.SYNOPSIS + Run the native-Windows red-test suite for codebase-memory-mcp. + +.DESCRIPTION + Builds the production binary (build/c/codebase-memory-mcp.exe) if it is not + already present, then runs the deterministic Windows red tests under + tests/windows/. These tests reproduce platform-specific failures at the + product surface (real MCP process, real stdio, real SQLite DB). + + The unit/invariant C suite is built and run via Makefile.cbm. On native + Windows the MinGW/LLVM toolchain ships no libasan/libubsan, so the sanitizer + flags must be disabled for the local build (SANITIZE=). Where the toolchain + *does* provide AddressSanitizer/UBSan (Linux containers, WSL), prefer + scripts/test.sh which keeps the sanitizers on. + +.PARAMETER Binary + Path to an existing codebase-memory-mcp.exe. If omitted, the script looks for + build/c/codebase-memory-mcp.exe and builds it when missing. + +.PARAMETER Make + Path to GNU make (default: 'make' on PATH; MSYS2 ships it at + C:\msys64\usr\bin\make.exe). + +.EXAMPLE + pwsh -File scripts/test-windows.ps1 +#> +[CmdletBinding()] +param( + [string]$Binary, + [string]$Make = "make" +) + +$ErrorActionPreference = "Stop" +$repoRoot = Split-Path -Parent $PSScriptRoot +Set-Location $repoRoot + +$python = (Get-Command python -ErrorAction SilentlyContinue) +if (-not $python) { $python = (Get-Command py -ErrorAction SilentlyContinue) } +if (-not $python) { throw "Python 3 is required to run the Windows red tests." } +$py = $python.Source + +# A writable Windows temp dir that GNU make forwards to the native gcc. MSYS2 +# strips TMP/TEMP from the environment it hands native children, so pass them as +# make command-line variables (make exports those to recipe processes). +$tmp = $env:TEMP +if (-not $tmp) { $tmp = "$env:USERPROFILE\AppData\Local\Temp" } + +function Resolve-Binary { + param([string]$Explicit) + if ($Explicit) { return (Resolve-Path $Explicit).Path } + $built = Join-Path $repoRoot "build\c\codebase-memory-mcp.exe" + if (Test-Path $built) { return $built } + Write-Host "Building production binary via Makefile.cbm ..." -ForegroundColor Cyan + & $Make "-j" "-f" "Makefile.cbm" "cbm" "TMP=$tmp" "TEMP=$tmp" "TMPDIR=$tmp" + if ($LASTEXITCODE -ne 0) { throw "build failed (exit $LASTEXITCODE)" } + if (-not (Test-Path $built)) { throw "binary not produced at $built" } + return $built +} + +$bin = Resolve-Binary -Explicit $Binary +Write-Host "Binary: $bin" -ForegroundColor Green + +$env:PYTHONUTF8 = "1" # ensure the harness encodes argv/stdio as UTF-8 + +$tests = @( + "tests\windows\test_non_ascii_path.py", + "tests\windows\test_cli_non_ascii_arg.py" +) + +$failed = @() +foreach ($t in $tests) { + Write-Host "`n=== $t ===" -ForegroundColor Cyan + & $py $t $bin + $code = $LASTEXITCODE + if ($code -eq 0) { + Write-Host "GREEN ($t)" -ForegroundColor Green + } elseif ($code -eq 1) { + Write-Host "RED ($t) - Windows-specific failure reproduced" -ForegroundColor Red + $failed += $t + } else { + Write-Host "SETUP ERROR ($t) exit=$code" -ForegroundColor Yellow + $failed += $t + } +} + +Write-Host "" +if ($failed.Count -gt 0) { + Write-Host ("RED suite: {0}/{1} Windows red tests failed (expected until the " -f $failed.Count, $tests.Count) -ForegroundColor Red + Write-Host "platform issues are fixed). See tests/windows/RED_TEST_ANALYSIS.md." -ForegroundColor Red + exit 1 +} +Write-Host "All Windows red tests are GREEN." -ForegroundColor Green +exit 0 diff --git a/tests/windows/RED_TEST_ANALYSIS.md b/tests/windows/RED_TEST_ANALYSIS.md new file mode 100644 index 000000000..2c13db967 --- /dev/null +++ b/tests/windows/RED_TEST_ANALYSIS.md @@ -0,0 +1,180 @@ +# Windows Red-Test Analysis + +Deterministic, Windows-only red tests found during a native-Windows red-test +campaign. They reproduce platform-specific failures at the product surface and +are intended as regression guards while the underlying issues are fixed in +separate maintainer PRs. **This PR contains no production fixes.** + +## Environment + +- OS: Microsoft Windows 11 Pro, build 10.0.26200 +- Source build: MinGW-w64 GCC 15.2.0 (MSYS2), `make -f Makefile.cbm cbm` +- Filesystem: NTFS, code page 65001 (UTF-8 console) +- Shells/launchers exercised: PowerShell 5.1 (5.1.26100), `cmd.exe`, + Git Bash (MSYS2), direct Win32 process launch, Python `subprocess.Popen`, + Python stdio (line-delimited JSON-RPC) transport +- CBM source commit under test: `b075f05` +- Binary: `build/c/codebase-memory-mcp.exe` (production build) + +### Sanitizer note + +The MinGW/LLVM toolchain available on this machine ships **no** `libasan` / +`libubsan`, so an AddressSanitizer/UBSan build is not possible natively (the plan +anticipates this). The C unit/invariant suite (`build/c/test-runner`) was built +with `SANITIZE=` and runs; the two red tests below are product-level integration +tests that drive a real `codebase-memory-mcp.exe` over stdio. On a host where the +toolchain provides sanitizers (Linux container, WSL), the same fixtures should be +run through an ASan/UBSan binary via `scripts/test.sh`. + +## How to run + +```powershell +# Builds build/c/codebase-memory-mcp.exe if missing, then runs the red suite. +pwsh -File scripts/test-windows.ps1 +# or, against an installed/relocated binary: +pwsh -File scripts/test-windows.ps1 -Binary "C:\path\to\codebase-memory-mcp.exe" +``` + +Each test exits `0` (green / invariant holds), `1` (red / Windows failure +reproduced), or `2` (environment/setup error). Standard-library Python 3 only. + +--- + +## windows_non_ascii_repo_path_preserves_definitions + +- Class: integration +- Test: `tests/windows/test_non_ascii_path.py` +- Related issues: #636, #357, #571 (naming), #530 +- Environment: Windows 11 26200, PowerShell 5.1 / Python stdio, NTFS, CP 65001 +- Fixture: byte-identical 2-file TypeScript repo (`src/math.ts`, `src/main.ts`), + copied to an ASCII parent path and to four non-ASCII parent paths + (Latin-1 accents `café`, Cyrillic `проект`, CJK `日本語`, Greek `Ωμέγα`) +- Expected: each non-ASCII copy produces the same graph counts as the ASCII + baseline (12 nodes / 20 edges / 5 definition nodes) +- Actual: every non-ASCII copy produces **5 nodes / 4 edges / 0 definition + nodes** — only `File`/`Folder` nodes; zero `Function`/`Class`/`Method` +- Command: `python tests/windows/test_non_ascii_path.py build\c\codebase-memory-mcp.exe` +- Minimal failure output: + + ``` + baseline (ASCII): nodes=12 edges=20 definitions=5 + [FAIL] non-ascii/latin1_accents nodes=5 edges=4 definitions=0 (baseline 12/20/5) + [FAIL] non-ascii/cyrillic nodes=5 edges=4 definitions=0 (baseline 12/20/5) + [FAIL] non-ascii/cjk nodes=5 edges=4 definitions=0 (baseline 12/20/5) + [FAIL] non-ascii/greek nodes=5 edges=4 definitions=0 (baseline 12/20/5) + ``` + +- Suspected implementation area: the per-pass source readers + `read_file()` in `src/pipeline/pass_definitions.c`, `pass_calls.c`, + `pass_parallel.c`, `pass_semantic.c` (and the `k8s`/`lsp_cross`/`pkgmap` + variants) open files with plain `fopen(path, "rb")`. On Windows `fopen` + interprets the UTF-8 path in the active **ANSI code page**, so a path with + non-ASCII bytes cannot be opened and the tree-sitter parser receives no bytes. + Directory discovery already uses the wide API + (`cbm_utf8_to_wide` + `FindFirstFileW` in `src/foundation/compat_fs.c`, + `src/foundation/platform.c`), which is why `File`/`Folder` nodes still appear + while all definitions vanish. Fix direction: route the pass-level reads through + the wide layer (`cbm_utf8_to_wide` + `_wfopen`), or add a shared + UTF-8-aware file reader and use it from every pass. + +Verified with `_wfopen` vs `fopen` on a non-ASCII path: `fopen(utf8, "rb")` +returns `NULL`, `_wfopen(cbm_utf8_to_wide(utf8), L"rb")` opens the same file. + +This invariant holds on Linux/macOS (byte-transparent UTF-8 filesystem); the test +turns green once the pass readers convert to wide. + +--- + +## windows_cli_non_ascii_repo_path_is_honored + +- Class: integration +- Test: `tests/windows/test_cli_non_ascii_arg.py` +- Related issues: #636, #423, #20 +- Environment: Windows 11 26200, `cli` argv path, NTFS, CP 65001 +- Fixture: a TypeScript repo under a non-ASCII directory (`café_日本語_repo`), + created with the OS wide API so it genuinely exists; an ASCII control repo +- Expected: `codebase-memory-mcp cli index_repository '{"repo_path":""}'` + indexes the directory (ASCII control proves the CLI path works) +- Actual: the ASCII control indexes; the non-ASCII invocation fails with + `repo_path is required` (the mangled, now-invalid-UTF-8 JSON argument is + rejected) and exits non-zero +- Command: `python tests/windows/test_cli_non_ascii_arg.py build\c\codebase-memory-mcp.exe` +- Minimal failure output: + + ``` + ASCII control: indexed OK + non-ASCII argv: rc=1 + stderr: ... repo_path is required + ``` + +- Suspected implementation area: `int main(int argc, char **argv)` in + `src/main.c` does not use `wmain` / `GetCommandLineW`, so on Windows the C + runtime delivers `argv` in the ANSI code page. The non-ASCII bytes in the JSON + argument are corrupted before `yyjson` parses them. Fix direction: read the + wide command line on Windows (`GetCommandLineW` + `CommandLineToArgvW`, or a + `wmain` entrypoint) and convert each argument to UTF-8. + +Real MCP clients pass `repo_path` inside a JSON-RPC message over stdio (which is +byte-clean), so this affects the documented `cli` entrypoint and the hook/install +flows that shell out to it, not the stdio server path. Holds on Linux/macOS +(argv is UTF-8 bytes). + +--- + +## Seed areas revisited and ruled out (green on native Windows) + +Each was reproduced as a concrete attempt against the production binary and +behaved correctly — recorded as green and **not** included as a red test: + +| Area | Seed | Result on Windows | +|---|---|---| +| stdio `initialize` returns before stdin EOF | #513, #635 | green | +| `tools/list` non-empty; all 14 tools return valid JSON-RPC | #530 | green | +| Client exit terminates the server process (no residual `.exe`) | #185, #406 | green | +| `--help` / `--version` exit 0 in PowerShell, cmd, Git Bash | — | green | +| `search_code` works without bash/GNU grep (PowerShell `Select-String`) | #422, #348 | green | +| `.gitignore` and `.cbmignore` honored | #274 | green | +| `detect_changes` reports real changed files across commits | #371, #137 | green | +| `query_graph` shapes (counts, paths, labels) — no crash/disconnect | #627 | green | +| Paths with spaces, `&`, `()`, `[]`, `#`, `%`, `!`, apostrophe | #272 | green | +| Mixed slash/backslash and lower-case drive letters | #133 | green | +| Non-UTF-8 (CP949) source file emits valid UTF-8 JSON; no crash | #511 | green | +| Re-index is idempotent (counts stable, single project) | #140 | green | +| Index never escapes the selected root | #331 | green | +| Every JSON-RPC response decodes as strict UTF-8 | invariant | green | + +## Observed but intentionally out of scope for this PR + +- **Project-name collision for non-ASCII paths (#571/#20).** Two distinct repos + (`проект`, `日本語`) under the same parent derive the *same* project name, + because `cbm_project_name_from_path` (`src/pipeline/fqn.c`) maps every + non-`[A-Za-z0-9._-]` byte to `-` and then trims. This is a real bug but it is + **not Windows-specific** — `cbm_project_name_from_path` is platform-independent + and collides identically on Linux. Per the campaign rules it is recorded here + and left for a cross-platform PR. +- **Paths longer than 260 characters.** This machine has + `HKLM\SYSTEM\CurrentControlSet\Control\FileSystem\LongPathsEnabled = 0`, so + paths over `MAX_PATH` are unreachable by every application, not just CBM. + CBM could opt in via the `\\?\` prefix + wide APIs, but the failure is gated by + a machine-wide policy rather than a clean CBM-only defect, so it is excluded. +- **C `test-runner` failures on Windows.** The in-process C suite reports many + extraction-count failures concentrated in `test_grammar_probe_*`, + `test_node_creation_probe`, `test_edge_*`, `test_matrix_*`, and + `test_integration.c` (e.g. `integ_index_has_files` finds 0 files even for an + **ASCII** fixture). The production binary indexes those same ASCII/CRLF cases + correctly (CRLF vs LF source files were verified to extract identically), so + these look like in-process test-harness issues rather than user-facing product + regressions. Distinguishing genuine Windows-only product regressions from + fixture/harness sensitivity requires a Linux baseline of the same commit and is + left as a follow-up; they are deliberately **not** converted into red tests + here to avoid shipping undiagnosed assertions. + +## Stop-condition coverage + +- Shells/launchers covered: PowerShell 5.1, `cmd.exe`, Git Bash, direct Win32, + Python `subprocess`, Python stdio JSON-RPC (>= 3 required). +- Classes covered in the green streak: smoke, integration, unit (the passing + `build/c/test-runner` cases), invariant. +- Seed areas (Unicode paths, mapped-drive/UNC, stdio, `search_code`, + install/update, watcher/ignore, query, memory/process lifecycle) were each + revisited or explicitly ruled out above. diff --git a/tests/windows/mcp_stdio.py b/tests/windows/mcp_stdio.py new file mode 100644 index 000000000..251cff8dd --- /dev/null +++ b/tests/windows/mcp_stdio.py @@ -0,0 +1,146 @@ +"""Minimal MCP stdio client for the Windows red-test suite. + +Drives a real codebase-memory-mcp(.exe) over a line-delimited JSON-RPC stdio +pipe. The pipe carries UTF-8 bytes, so a non-ASCII repo_path reaches the server +without passing through the Windows ANSI command-line code page (which mangles +argv for a binary whose main() is not wmain/GetCommandLineW). This isolates the +server's real path handling from CLI-argv encoding artifacts. + +No third-party dependencies — standard library only. +""" +import json +import os +import subprocess +import threading +import time + + +class McpError(Exception): + pass + + +class McpServer: + def __init__(self, binary, cache_dir=None, extra_env=None, cwd=None): + self.binary = binary + self._id = 0 + self.proc = None + self._stderr = [] + env = dict(os.environ) + if cache_dir: + env["CBM_CACHE_DIR"] = cache_dir # isolate the graph DB location + if extra_env: + env.update(extra_env) + self.env = env + self.cwd = cwd + + def __enter__(self): + self.start() + return self + + def __exit__(self, *a): + self.close() + + def start(self): + self.proc = subprocess.Popen( + [self.binary], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + env=self.env, cwd=self.cwd, bufsize=0) + threading.Thread(target=self._drain_stderr, daemon=True).start() + + def _drain_stderr(self): + try: + for line in self.proc.stderr: + self._stderr.append(line.decode("utf-8", "replace")) + except Exception: + pass + + def stderr_text(self): + return "".join(self._stderr) + + def _send(self, obj): + data = json.dumps(obj, ensure_ascii=False).encode("utf-8") + self.proc.stdin.write(data + b"\n") + self.proc.stdin.flush() + + def _read_message(self, timeout=60): + result = {} + + def reader(): + try: + result["line"] = self.proc.stdout.readline() + except Exception as ex: + result["exc"] = ex + + th = threading.Thread(target=reader, daemon=True) + th.start() + th.join(timeout) + if th.is_alive(): + raise McpError("timeout after %ss (hang)" % timeout) + if "exc" in result: + raise McpError("read error: %r" % result["exc"]) + line = result.get("line", b"") + if not line: + raise McpError("EOF / server closed stdout") + # strict: an invalid-UTF-8 JSON-RPC response is itself a failure. + return json.loads(line.decode("utf-8", "strict")) + + def request(self, method, params=None, timeout=60): + self._id += 1 + rid = self._id + self._send({"jsonrpc": "2.0", "id": rid, "method": method, + "params": params or {}}) + deadline = time.time() + timeout + while True: + msg = self._read_message(timeout=max(1, deadline - time.time())) + if msg.get("id") == rid: + return msg + if time.time() > deadline: + raise McpError("timeout waiting for id=%d" % rid) + + def notify(self, method, params=None): + self._send({"jsonrpc": "2.0", "method": method, "params": params or {}}) + + def initialize(self, timeout=60): + resp = self.request("initialize", { + "protocolVersion": "2024-11-05", "capabilities": {}, + "clientInfo": {"name": "windows-red-test", "version": "1.0"}}, timeout) + if "error" in resp: + raise McpError("initialize error: %r" % resp["error"]) + try: + self.notify("notifications/initialized") + except Exception: + pass + return resp + + def tools_list(self, timeout=60): + resp = self.request("tools/list", {}, timeout=timeout) + if "error" in resp: + raise McpError("tools/list error: %r" % resp["error"]) + return resp["result"]["tools"] + + def call_tool(self, name, arguments, timeout=180): + return self.request("tools/call", + {"name": name, "arguments": arguments}, timeout=timeout) + + @staticmethod + def tool_text(resp): + if "error" in resp: + return None, resp["error"] + parts = [c.get("text", "") for c in resp.get("result", {}).get("content", []) + if c.get("type") == "text"] + return "".join(parts), None + + def close(self): + if not self.proc: + return + try: + self.proc.stdin.close() + except Exception: + pass + try: + self.proc.wait(timeout=10) + except Exception: + try: + self.proc.kill() + except Exception: + pass diff --git a/tests/windows/test_cli_non_ascii_arg.py b/tests/windows/test_cli_non_ascii_arg.py new file mode 100644 index 000000000..1aafad183 --- /dev/null +++ b/tests/windows/test_cli_non_ascii_arg.py @@ -0,0 +1,104 @@ +"""RED integration test — `cli index_repository` rejects a non-ASCII repo_path. + +Reproduces the CLI-argv half of issue #636 / #423 / #20 on native Windows. + +The documented entrypoint `codebase-memory-mcp cli index_repository ''` +receives its JSON argument through argv. main() is declared as +`int main(int argc, char **argv)` (src/main.c) — it does not use wmain / +GetCommandLineW — so on Windows the C runtime hands it argv in the active ANSI +code page. A repo_path containing non-ASCII characters is therefore mangled (or, +when yyjson rejects the now-invalid UTF-8, the whole argument is discarded), and +the command fails with "repo_path is required" / "Pipeline failed" instead of +indexing the real directory. + +The directory itself is created with the Windows wide API (Python uses +CreateFileW/_wmkdir under the hood), so it genuinely exists on disk; only the +argv path delivery is lossy. + +Passes on Linux/macOS (argv is UTF-8 bytes). Fails on native Windows until the +CLI reads the wide command line (GetCommandLineW + CommandLineToArgvW, or a +wmain entrypoint) and converts to UTF-8. + +Exit code: 0 == honored (green), 1 == rejected/mangled (red), 2 == setup error. + +Usage: + python test_cli_non_ascii_arg.py +""" +import json +import os +import shutil +import subprocess +import sys +import tempfile + +MATH_TS = ( + "export function add(a: number, b: number): number { return a + b; }\n" + "export class Calc { total = 0; push(x: number): void { this.total = " + "add(this.total, x); } }\n" +) + + +def make_fixture(root): + src = os.path.join(root, "src") + os.makedirs(src, exist_ok=True) + with open(os.path.join(src, "math.ts"), "wb") as f: + f.write(MATH_TS.encode("utf-8")) + + +def main(): + if len(sys.argv) < 2: + print("usage: python test_cli_non_ascii_arg.py ") + return 2 + binary = os.path.abspath(sys.argv[1]) + if not os.path.exists(binary): + print("FAIL: binary not found: %s" % binary) + return 2 + + work = tempfile.mkdtemp(prefix="cbm_win_cliarg_") + try: + # Non-ASCII repo directory (created via the OS wide API → really exists). + repo = os.path.join(work, "café_日本語_repo") + make_fixture(repo) + cache = os.path.join(work, "cache") + os.makedirs(cache, exist_ok=True) + + # Sanity: an ASCII control path must index through the CLI, proving the + # CLI path itself works and isolating the failure to argv encoding. + ascii_repo = os.path.join(work, "ascii_repo") + make_fixture(ascii_repo) + env = dict(os.environ) + env["CBM_CACHE_DIR"] = os.path.join(work, "cache_ascii") + ctrl = subprocess.run( + [binary, "cli", "index_repository", + json.dumps({"repo_path": ascii_repo})], + capture_output=True, timeout=120, env=env) + ctrl_out = (ctrl.stdout or b"").decode("utf-8", "replace") + if '"nodes"' not in ctrl_out: + print("SETUP FAIL: ASCII control did not index via CLI:\n%s" % + ctrl_out[:300]) + return 2 + + env2 = dict(os.environ) + env2["CBM_CACHE_DIR"] = cache + arg = json.dumps({"repo_path": repo}, ensure_ascii=False) + p = subprocess.run([binary, "cli", "index_repository", arg], + capture_output=True, timeout=120, env=env2) + out = (p.stdout or b"").decode("utf-8", "replace") + err = (p.stderr or b"").decode("utf-8", "replace") + honored = '"nodes"' in out and '"nodes":0' not in out.replace(" ", "") + print("ASCII control: indexed OK") + print("non-ASCII argv: rc=%d" % p.returncode) + print(" stdout: %s" % out[:200].replace("\n", " ")) + print(" stderr: %s" % err[-200:].replace("\n", " ")) + if honored: + print("\nGREEN: CLI honored the non-ASCII repo_path.") + return 0 + print("\nRED: CLI did not index the non-ASCII repo_path (argv delivered " + "in the ANSI code page; main() does not read the wide command line).") + return 1 + finally: + shutil.rmtree(work, ignore_errors=True) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/windows/test_non_ascii_path.py b/tests/windows/test_non_ascii_path.py new file mode 100644 index 000000000..fb5bea26c --- /dev/null +++ b/tests/windows/test_non_ascii_path.py @@ -0,0 +1,166 @@ +"""RED integration test — Windows non-ASCII repo path drops all definitions. + +Reproduces issue #636 / #357 at the product surface (real codebase-memory-mcp +process, real SQLite DB, real stdio). Two byte-identical TypeScript fixtures are +indexed: one under an ASCII parent path, one under a non-ASCII parent path. The +invariant under test: + + A byte-identical fixture must produce equivalent graph counts regardless of + whether its absolute path contains non-ASCII characters. + +Observed on native Windows: the ASCII copy extracts functions/classes/methods +(12 nodes / 20 edges); every non-ASCII copy (Latin-1 accents, Cyrillic, CJK, +Greek) extracts only File/Folder nodes (5 nodes / 4 edges) — zero definitions. + +Root cause: each pipeline pass reads source bytes with plain fopen(path, "rb") +(src/pipeline/pass_definitions.c, pass_calls.c, pass_parallel.c, pass_semantic.c, +…). On Windows fopen() interprets the UTF-8 path in the active ANSI code page, +so a path with non-ASCII bytes cannot be opened and the parser receives nothing. +Directory discovery already uses the wide API (cbm_utf8_to_wide + FindFirstFileW +in src/foundation/compat_fs.c), which is why File/Folder nodes still appear. + +This test passes on Linux/macOS (byte-transparent UTF-8 filesystem) and fails on +native Windows. It turns green once the per-pass read_file helpers convert the +UTF-8 path to wide (_wfopen) the way compat_fs.c / platform.c already do. + +Exit code: 0 == invariant holds (green), 1 == invariant violated (red), +2 == environment/setup error. + +Usage: + python test_non_ascii_path.py +""" +import json +import os +import shutil +import sys +import tempfile + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from mcp_stdio import McpServer # noqa: E402 + +MATH_TS = ( + "export function add(a: number, b: number): number { return a + b; }\n" + "export function mul(a: number, b: number): number { return add(a, a); }\n" + "export class Calc {\n" + " total: number = 0;\n" + " push(x: number): void { this.total = add(this.total, x); }\n" + "}\n" +) +MAIN_TS = ( + 'import { add, mul, Calc } from "./math";\n' + "function run(): number {\n" + " const c = new Calc();\n" + " c.push(add(1, 2));\n" + " return mul(3, 4);\n" + "}\n" + "run();\n" +) + +# Distinct non-ASCII scripts — each must behave like the ASCII baseline. +NON_ASCII_SEGMENTS = { + "latin1_accents": "café_repo", + "cyrillic": "проект_repo", + "cjk": "日本語_repo", + "greek": "Ωμέγα_repo", +} + + +def make_fixture(root): + src = os.path.join(root, "src") + os.makedirs(src, exist_ok=True) + for name, text in (("math.ts", MATH_TS), ("main.ts", MAIN_TS)): + with open(os.path.join(src, name), "wb") as f: + f.write(text.encode("utf-8")) # exact bytes, identical across copies + + +def index_and_count(binary, repo, cache): + """Index `repo` into an isolated cache and return label-resolved counts.""" + os.makedirs(cache, exist_ok=True) + with McpServer(binary, cache_dir=cache) as s: + s.initialize() + resp = s.call_tool("index_repository", {"repo_path": repo}, timeout=180) + _, err = s.tool_text(resp) + if err: + return {"error": "index tools/call error: %r" % err} + lp = s.call_tool("list_projects", {}, timeout=60) + lp_txt, _ = s.tool_text(lp) + projects = json.loads(lp_txt).get("projects") or [] + if not projects: + return {"error": "no project listed after index"} + p = projects[0] + out = {"name": p.get("name"), "nodes": p.get("nodes"), + "edges": p.get("edges")} + # Definition-level counts prove the parser ran (not just discovery). + # query_graph returns {"columns":[...],"rows":[[""]],...}. + name = p.get("name") + defs = 0 + for label in ("Function", "Class", "Method"): + q = "MATCH (n:%s) RETURN count(n)" % label + r = s.call_tool("query_graph", {"query": q, "project": name}, + timeout=60) + t, _ = s.tool_text(r) + try: + rows = json.loads(t).get("rows") or [] + if rows and rows[0]: + defs += int(rows[0][0]) + except Exception: + pass + out["definition_nodes"] = defs + return out + + +def main(): + if len(sys.argv) < 2: + print("usage: python test_non_ascii_path.py ") + return 2 + binary = os.path.abspath(sys.argv[1]) + if not os.path.exists(binary): + print("FAIL: binary not found: %s" % binary) + return 2 + + work = tempfile.mkdtemp(prefix="cbm_win_nonascii_") + failures = [] + try: + ascii_repo = os.path.join(work, "ascii_repo") + make_fixture(ascii_repo) + base = index_and_count(binary, ascii_repo, os.path.join(work, "c_ascii")) + if base.get("error") or not base.get("nodes"): + print("SETUP FAIL: ASCII baseline did not index: %r" % base) + return 2 + print("baseline (ASCII): nodes=%s edges=%s definitions=%s" % + (base["nodes"], base["edges"], base["definition_nodes"])) + if base["definition_nodes"] < 1: + print("SETUP FAIL: ASCII baseline produced no definitions: %r" % base) + return 2 + + for key, seg in NON_ASCII_SEGMENTS.items(): + repo = os.path.join(work, seg) + make_fixture(repo) + got = index_and_count(binary, repo, os.path.join(work, "c_" + key)) + ok = (not got.get("error") + and got.get("nodes") == base["nodes"] + and got.get("edges") == base["edges"] + and got.get("definition_nodes") == base["definition_nodes"]) + status = "PASS" if ok else "FAIL" + print("[%s] non-ascii/%-14s nodes=%s edges=%s definitions=%s " + "(baseline %s/%s/%s) name=%r" % + (status, key, got.get("nodes"), got.get("edges"), + got.get("definition_nodes"), base["nodes"], base["edges"], + base["definition_nodes"], got.get("name"))) + if not ok: + failures.append(key) + finally: + shutil.rmtree(work, ignore_errors=True) + + if failures: + print("\nRED: %d/%d non-ASCII path variants lost definitions: %s" % + (len(failures), len(NON_ASCII_SEGMENTS), ", ".join(failures))) + print("Invariant violated: byte-identical fixtures under non-ASCII paths " + "must extract the same definitions as the ASCII baseline.") + return 1 + print("\nGREEN: all non-ASCII path variants matched the ASCII baseline.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From a4dbe89941e31d38c3109977d462a729eb2ed131 Mon Sep 17 00:00:00 2001 From: Flipper Date: Sat, 27 Jun 2026 09:45:07 +0200 Subject: [PATCH 2/4] test(windows): add red regressions for hook augmenter and UI drive picker Extends the Windows red-test suite with two more reproduced, Windows-specific failures. No production fixes. - windows_hook_augment_emits_context (integration, #618): the PreToolUse Grep/Glob augmenter `hook-augment` emits empty stdout for every payload on Windows. src/cli/hook_augment.c gates on POSIX-style absolute paths (cwd[0] == '/' and the walk-up loop's dir[0] == '/'), which a Windows drive-letter cwd never satisfies, so the graph augmentation never fires. A control search_graph confirms the symbol is indexed. - windows_ui_picker_reaches_all_drives (integration, #548): the UI directory picker's GET /api/browse?path=/ returns no entries and never enumerates logical drives, so drives other than the system drive (D:\, E:\) cannot be selected. handle_browse in src/ui/http_server.c uses opendir without a GetLogicalDriveStrings root case. Needs a UI build (cbm-with-ui) and >1 drive; otherwise it reports a precondition (exit 2). Also records additional ruled-out seed areas (get_code_snippet sanitizes non-UTF-8 to U+FFFD #530.3; stdio handshake/flush works #513/#530.1/#635; mapped subst-drive indexing keeps the DB #227/#367) and cross-platform items left out of this Windows-only PR (#530.2 nested gitignore, #530.5 .git/info/exclude, #530.4 libgit2 build, #581 memory soak). Signed-off-by: Flipper --- scripts/test-windows.ps1 | 29 ++-- tests/windows/RED_TEST_ANALYSIS.md | 87 +++++++++++- tests/windows/test_hook_augment.py | 112 ++++++++++++++++ tests/windows/test_ui_drive_listing.py | 176 +++++++++++++++++++++++++ 4 files changed, 394 insertions(+), 10 deletions(-) create mode 100644 tests/windows/test_hook_augment.py create mode 100644 tests/windows/test_ui_drive_listing.py diff --git a/scripts/test-windows.ps1 b/scripts/test-windows.ps1 index b426fa350..620692a4a 100644 --- a/scripts/test-windows.ps1 +++ b/scripts/test-windows.ps1 @@ -63,12 +63,19 @@ Write-Host "Binary: $bin" -ForegroundColor Green $env:PYTHONUTF8 = "1" # ensure the harness encodes argv/stdio as UTF-8 +# test_ui_drive_listing.py reproduces the UI directory-picker bug (#548) and +# therefore needs a UI build (make -f Makefile.cbm cbm-with-ui) plus a machine +# with more than one drive. Against a non-UI binary it reports a precondition +# (exit 2), which is treated as a skip-with-reason, not a failure. $tests = @( "tests\windows\test_non_ascii_path.py", - "tests\windows\test_cli_non_ascii_arg.py" + "tests\windows\test_cli_non_ascii_arg.py", + "tests\windows\test_hook_augment.py", + "tests\windows\test_ui_drive_listing.py" ) -$failed = @() +$reds = @() +$precond = @() foreach ($t in $tests) { Write-Host "`n=== $t ===" -ForegroundColor Cyan & $py $t $bin @@ -77,18 +84,22 @@ foreach ($t in $tests) { Write-Host "GREEN ($t)" -ForegroundColor Green } elseif ($code -eq 1) { Write-Host "RED ($t) - Windows-specific failure reproduced" -ForegroundColor Red - $failed += $t + $reds += $t } else { - Write-Host "SETUP ERROR ($t) exit=$code" -ForegroundColor Yellow - $failed += $t + Write-Host "PRECONDITION ($t) exit=$code - skipped (see message above)" -ForegroundColor Yellow + $precond += $t } } Write-Host "" -if ($failed.Count -gt 0) { - Write-Host ("RED suite: {0}/{1} Windows red tests failed (expected until the " -f $failed.Count, $tests.Count) -ForegroundColor Red - Write-Host "platform issues are fixed). See tests/windows/RED_TEST_ANALYSIS.md." -ForegroundColor Red +if ($precond.Count -gt 0) { + Write-Host ("Precondition-skipped: {0} (e.g. test_ui_drive_listing needs a UI " -f $precond.Count) -ForegroundColor Yellow + Write-Host "build: make -f Makefile.cbm cbm-with-ui, and >1 drive)." -ForegroundColor Yellow +} +if ($reds.Count -gt 0) { + Write-Host ("RED suite: {0} Windows red tests reproduced platform failures " -f $reds.Count) -ForegroundColor Red + Write-Host "(expected until fixed). See tests/windows/RED_TEST_ANALYSIS.md." -ForegroundColor Red exit 1 } -Write-Host "All Windows red tests are GREEN." -ForegroundColor Green +Write-Host "All runnable Windows red tests are GREEN." -ForegroundColor Green exit 0 diff --git a/tests/windows/RED_TEST_ANALYSIS.md b/tests/windows/RED_TEST_ANALYSIS.md index 2c13db967..a24251c29 100644 --- a/tests/windows/RED_TEST_ANALYSIS.md +++ b/tests/windows/RED_TEST_ANALYSIS.md @@ -121,6 +121,77 @@ flows that shell out to it, not the stdio server path. Holds on Linux/macOS --- +## windows_hook_augment_emits_context + +- Class: integration +- Test: `tests/windows/test_hook_augment.py` +- Related issues: #618 +- Environment: Windows 11 26200, `hook-augment` CLI subcommand +- Fixture: a repo with a known function `someIndexedSymbol`, indexed; a realistic + Claude Code PreToolUse Grep payload with a Windows drive-letter `cwd` +- Expected: `codebase-memory-mcp hook-augment` emits a `hookSpecificOutput` with + `additionalContext` listing the matching graph symbol (the control + `search_graph` finds the symbol, so the index and project name are fine) +- Actual: `hook-augment` emits **empty stdout** for every payload +- Command: `python tests/windows/test_hook_augment.py build\c\codebase-memory-mcp.exe` +- Minimal failure output: + + ``` + control: search_graph finds someIndexedSymbol in project C-...-repo + hook-augment rc=0 stdout='' + ``` + +- Suspected implementation area: `src/cli/hook_augment.c` has two POSIX-only path + guards. `cbm_cmd_hook_augment` (`_WIN32` branch, ~L330): + `if (!cwd || cwd[0] != '/') { ...; return 0; }` and the `ha_resolve_and_query` + walk-up loop (~L254): `for (...; dir[0] == '/'; ...)`. A Windows `cwd` is a + drive-letter path (`C:\...` / `C:/...`), so `cwd[0]` is never `'/'`; the + augmenter bails before it queries the graph. The PreToolUse Grep/Glob graph + augmentation therefore never fires on Windows. Fix direction: accept + drive-letter absolute paths (and climb them in the walk-up loop). + +Holds on Linux/macOS (`cwd` starts with `/`). + +--- + +## windows_ui_picker_reaches_all_drives + +- Class: integration +- Test: `tests/windows/test_ui_drive_listing.py` +- Related issues: #548 +- Environment: Windows 11 26200 with drives `C:\`, `D:\`, `E:\`; UI build + (`make -f Makefile.cbm cbm-with-ui`); embedded HTTP server on a local port +- Fixture: none — exercises the live `GET /api/browse` endpoint +- Expected: browsing the filesystem root (`/api/browse?path=/`) lets the user + reach every fixed drive (`D:\`, `E:\`), so a project on a non-system drive can + be selected +- Actual: the control browse of an explicit directory returns entries (endpoint + works), but `browse('/')` returns **0 entries** and no drive letters — `D:\` + and `E:\` are unreachable from the picker root +- Command: `python tests/windows/test_ui_drive_listing.py build\c\codebase-memory-mcp.exe` +- Minimal failure output: + + ``` + control browse('C:/Users/jacob') -> dirs(23) + browse('/') -> path='/' dirs(0)=[] + RED: drives ['D:\\', 'E:\\'] are not reachable from the UI root picker + ``` + +- Suspected implementation area: `handle_browse` in `src/ui/http_server.c` does + `opendir(path)` for the requested path. For the root it lists only the current + drive's contents and never enumerates the logical drives + (`GetLogicalDriveStrings`). Fix direction: when the path is the filesystem root + on Windows, return the available drive letters as the directory list so the + picker can descend into any drive. + +This test requires a UI build because the HTTP server only starts when the +frontend is embedded (`CBM_EMBEDDED_FILE_COUNT > 0`); against a non-UI binary it +reports a precondition (exit 2), and on a single-drive machine it is not +meaningful (exit 2). Holds on Linux/macOS (a single `/` root with no drive +letters). + +--- + ## Seed areas revisited and ruled out (green on native Windows) Each was reproduced as a concrete attempt against the production binary and @@ -128,8 +199,10 @@ behaved correctly — recorded as green and **not** included as a red test: | Area | Seed | Result on Windows | |---|---|---| -| stdio `initialize` returns before stdin EOF | #513, #635 | green | +| stdio `initialize` returns before stdin EOF; stdout flushes before EOF | #513, #530.1, #635 | green | | `tools/list` non-empty; all 14 tools return valid JSON-RPC | #530 | green | +| `get_code_snippet` on a CP949 file emits valid UTF-8 (invalid bytes → U+FFFD) | #530.3 | green | +| Indexing a mapped (subst) drive `W:\` — no `bad_root_path`/`store.corrupt`, DB kept | #227, #367 | green (subst; real SMB not testable here) | | Client exit terminates the server process (no residual `.exe`) | #185, #406 | green | | `--help` / `--version` exit 0 in PowerShell, cmd, Git Bash | — | green | | `search_code` works without bash/GNU grep (PowerShell `Select-String`) | #422, #348 | green | @@ -157,6 +230,18 @@ behaved correctly — recorded as green and **not** included as a red test: paths over `MAX_PATH` are unreachable by every application, not just CBM. CBM could opt in via the `\\?\` prefix + wide APIs, but the failure is gated by a machine-wide policy rather than a clean CBM-only defect, so it is excluded. +- **Cascading nested `.gitignore` (#530.2) and `.git/info/exclude` (#530.5).** + `try_load_nested_gitignore` in `src/discover/discover.c` skips nested + `.gitignore` files once a parent ignore is loaded, and discovery never reads + `.git/info/exclude`. Both are real, but the discovery logic is + platform-independent and reproduces identically on Linux, so they are out of + scope for a Windows-only PR. +- **libgit2 1.8+ build break (#530.4).** `git_allocator` moved to + ``; cross-platform compile issue, not a Windows runtime bug. +- **Memory growth over hours (#581).** Requires a multi-hour soak to surface and + is not deterministic in a unit/integration test; the existing + `scripts/soak-test.sh` RSS-trend harness is the right vehicle and is not + reproduced as a red test here. - **C `test-runner` failures on Windows.** The in-process C suite reports many extraction-count failures concentrated in `test_grammar_probe_*`, `test_node_creation_probe`, `test_edge_*`, `test_matrix_*`, and diff --git a/tests/windows/test_hook_augment.py b/tests/windows/test_hook_augment.py new file mode 100644 index 000000000..8f0365502 --- /dev/null +++ b/tests/windows/test_hook_augment.py @@ -0,0 +1,112 @@ +r"""RED integration test — the PreToolUse hook augmenter is a no-op on Windows. + +Reproduces issue #618 at the product surface. + +`codebase-memory-mcp hook-augment` is the non-blocking Claude Code PreToolUse +Grep/Glob augmenter: given a hook payload it should emit a `hookSpecificOutput` +with `additionalContext` listing graph symbols that match the searched token. + +On Windows it emits nothing for every payload. `src/cli/hook_augment.c` gates on +POSIX-style absolute paths in two places: + + cbm_cmd_hook_augment (_WIN32 branch): if (!cwd || cwd[0] != '/') return 0; + ha_resolve_and_query walk-up loop: for (... ; dir[0] == '/'; ...) + +A Windows `cwd` is a drive-letter path (`C:\...` / `C:/...`), so `cwd[0]` is +never `'/'`; the augmenter bails before it ever queries the graph. + +This test indexes a repo with a known symbol, confirms `search_graph` finds it +(control — proves the index and project name are fine), then invokes +`hook-augment` exactly as the installed PreToolUse hook does and asserts a +`hookSpecificOutput` payload is produced. + +Passes on Linux/macOS (`cwd` starts with `/`). Fails on native Windows until the +path guards accept drive-letter absolute paths (and the walk-up loop climbs them). + +Exit code: 0 == augmenter fired (green), 1 == no-op (red), 2 == setup error. + +Usage: + python test_hook_augment.py +""" +import json +import os +import shutil +import subprocess +import sys +import tempfile + +SYMBOL = "someIndexedSymbol" +SRC = "export function %s(a: number): number { return a + 1; }\n" % SYMBOL + + +def run_cli(binary, cache, args, stdin=None, timeout=120): + env = dict(os.environ) + env["CBM_CACHE_DIR"] = cache + return subprocess.run([binary] + args, capture_output=True, timeout=timeout, + env=env, input=stdin) + + +def main(): + if len(sys.argv) < 2: + print("usage: python test_hook_augment.py ") + return 2 + binary = os.path.abspath(sys.argv[1]) + if not os.path.exists(binary): + print("FAIL: binary not found: %s" % binary) + return 2 + + work = tempfile.mkdtemp(prefix="cbm_win_hook_") + try: + repo = os.path.join(work, "repo") + os.makedirs(os.path.join(repo, "src"), exist_ok=True) + with open(os.path.join(repo, "src", "m.ts"), "wb") as f: + f.write(SRC.encode("utf-8")) + cache = os.path.join(work, "cache") + os.makedirs(cache, exist_ok=True) + + # repo_path / cwd in the forward-slash drive form Claude Code passes. + repo_fwd = repo.replace("\\", "/") + idx = run_cli(binary, cache, ["cli", "index_repository", + json.dumps({"repo_path": repo_fwd})]) + idx_out = (idx.stdout or b"").decode("utf-8", "replace") + if '"nodes"' not in idx_out: + print("SETUP FAIL: index did not run:\n%s" % idx_out[:300]) + return 2 + + # Control: prove the symbol is indexed and queryable. + lp = run_cli(binary, cache, ["cli", "list_projects", "{}"]) + projects = json.loads((lp.stdout or b"").decode("utf-8", "replace"))["projects"] + name = projects[0]["name"] + sg = run_cli(binary, cache, ["cli", "search_graph", + json.dumps({"label": "Function", + "name_pattern": ".*%s.*" % SYMBOL, + "project": name})]) + if SYMBOL not in (sg.stdout or b"").decode("utf-8", "replace"): + print("SETUP FAIL: control search_graph did not find %s" % SYMBOL) + return 2 + print("control: search_graph finds %s in project %s" % (SYMBOL, name)) + + # Invoke hook-augment exactly as the installed PreToolUse hook does. + payload = json.dumps({ + "hook_event_name": "PreToolUse", + "tool_name": "Grep", + "cwd": repo_fwd, + "tool_input": {"pattern": SYMBOL}, + }).encode("utf-8") + ha = run_cli(binary, cache, ["hook-augment"], stdin=payload, timeout=60) + out = (ha.stdout or b"").decode("utf-8", "replace").strip() + print("hook-augment rc=%d stdout=%r" % (ha.returncode, out[:200])) + + fired = ("hookSpecificOutput" in out) and ("additionalContext" in out) + if fired: + print("\nGREEN: PreToolUse augmenter emitted additionalContext.") + return 0 + print("\nRED: hook-augment produced no hookSpecificOutput on Windows " + "(drive-letter cwd fails the cwd[0]=='/' guards in hook_augment.c).") + return 1 + finally: + shutil.rmtree(work, ignore_errors=True) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/windows/test_ui_drive_listing.py b/tests/windows/test_ui_drive_listing.py new file mode 100644 index 000000000..9457bb4e9 --- /dev/null +++ b/tests/windows/test_ui_drive_listing.py @@ -0,0 +1,176 @@ +r"""RED integration test — the UI directory picker cannot reach non-system drives. + +Reproduces issue #548 at the product surface (the embedded HTTP UI). + +The UI directory picker calls `GET /api/browse?path=...` (handle_browse in +src/ui/http_server.c). For the filesystem root it does `opendir("/")`, which on +Windows resolves to the *current* drive's root and lists only that drive's +subdirectories. There is no `GetLogicalDriveStrings` drive enumeration, so when a +user opens the picker at root, drives other than the system drive (e.g. `D:\`, +`E:\`) never appear and cannot be selected. + +This test requires a UI build (`make -f Makefile.cbm cbm-with-ui`) because the +HTTP server only starts when the frontend is embedded. It launches the server, +queries `/api/browse?path=/`, and asserts that every fixed drive on the machine +is reachable from the root listing. It is meaningful only on a machine with more +than one drive; with a single drive it reports a precondition error (exit 2). + +Passes on a correct picker that enumerates drives; fails on native Windows until +handle_browse enumerates logical drives for the root path. + +Exit code: 0 == all drives reachable (green), 1 == non-system drives missing +(red), 2 == precondition not met (single drive / no UI build / server down). + +Usage: + python test_ui_drive_listing.py [port] +""" +import json +import os +import shutil +import socket +import subprocess +import sys +import tempfile +import time +import urllib.request + + +def list_fixed_drives(): + # Python 3.12+: os.listdrives(). Fall back to scanning A:..Z:. + listdrives = getattr(os, "listdrives", None) + if listdrives: + try: + return [d for d in listdrives()] + except Exception: + pass + found = [] + for ch in "CDEFGHIJKLMNOPQRSTUVWXYZ": + root = "%s:\\" % ch + if os.path.isdir(root): + found.append(root) + return found + + +def free_port(): + s = socket.socket() + s.bind(("127.0.0.1", 0)) + p = s.getsockname()[1] + s.close() + return p + + +def http_get_json(url, timeout=5): + with urllib.request.urlopen(url, timeout=timeout) as r: + return json.loads(r.read().decode("utf-8", "replace")) + + +def wait_for_server(port, timeout=20): + deadline = time.time() + timeout + while time.time() < deadline: + try: + with socket.create_connection(("127.0.0.1", port), timeout=1): + return True + except OSError: + time.sleep(0.3) + return False + + +def main(): + if len(sys.argv) < 2: + print("usage: python test_ui_drive_listing.py [port]") + return 2 + binary = os.path.abspath(sys.argv[1]) + if not os.path.exists(binary): + print("FAIL: binary not found: %s" % binary) + return 2 + + drives = list_fixed_drives() + extra = [d for d in drives if not d.upper().startswith("C:")] + print("fixed drives: %s" % drives) + if not extra: + print("PRECONDITION: only one drive present; cannot test multi-drive " + "picker. Re-run on a machine with a D:/E: drive.") + return 2 + + work = tempfile.mkdtemp(prefix="cbm_win_uidrv_") + port = int(sys.argv[2]) if len(sys.argv) > 2 else free_port() + env = dict(os.environ) + env["CBM_CACHE_DIR"] = os.path.join(work, "cache") + os.makedirs(env["CBM_CACHE_DIR"], exist_ok=True) + proc = subprocess.Popen([binary, "--ui=true", "--port=%d" % port], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, env=env) + try: + if not wait_for_server(port, timeout=25): + err = b"" + try: + proc.stderr.settimeout = None + except Exception: + pass + print("PRECONDITION: HTTP server did not start on port %d. Is this a " + "UI build (make cbm-with-ui)?" % port) + return 2 + + # Control: browsing an explicit existing directory must return entries, + # proving the endpoint works and isolating the bug to root enumeration. + import urllib.parse + home = os.environ.get("USERPROFILE") or os.path.expanduser("~") + home_fwd = home.replace("\\", "/") + try: + ctrl = http_get_json("http://127.0.0.1:%d/api/browse?path=%s" % + (port, urllib.parse.quote(home_fwd))) + except Exception as ex: + print("PRECONDITION: control /api/browse?path=%s failed: %r" % + (home_fwd, ex)) + return 2 + print("control browse(%r) -> dirs(%d)" % (home_fwd, len(ctrl.get("dirs", [])))) + if not ctrl.get("dirs"): + print("PRECONDITION: control browse returned no dirs; endpoint may be " + "non-functional in this build.") + return 2 + + # Browse the filesystem root. + try: + root = http_get_json("http://127.0.0.1:%d/api/browse?path=/" % port) + except Exception as ex: + print("PRECONDITION: /api/browse?path=/ failed: %r" % ex) + return 2 + root_dirs = root.get("dirs", []) + print("browse('/') -> path=%r dirs(%d)=%s" % + (root.get("path"), len(root_dirs), root_dirs[:20])) + + # A correct root listing must let the user reach every drive. Accept a + # match whether the API returns "D:", "D", or "D:\\"/"D:/". + def reachable(drive_root): + letter = drive_root[0].upper() + cands = {letter, letter + ":", letter + ":\\", letter + ":/", + drive_root, drive_root.rstrip("\\/")} + return any(str(d).rstrip("\\/").upper() in + {x.rstrip("\\/").upper() for x in cands} for d in root_dirs) + + missing = [d for d in extra if not reachable(d)] + if not missing: + print("\nGREEN: all non-system drives reachable from the root picker.") + return 0 + print("\nRED: drives %s are not reachable from the UI root picker " + "(/api/browse?path=/ lists only the current drive; handle_browse " + "does not enumerate logical drives)." % missing) + return 1 + finally: + try: + proc.stdin.close() + except Exception: + pass + try: + proc.terminate() + proc.wait(timeout=5) + except Exception: + try: + proc.kill() + except Exception: + pass + shutil.rmtree(work, ignore_errors=True) + + +if __name__ == "__main__": + sys.exit(main()) From 2b4fa2ded4112e8391d4bd05244a3214380d2862 Mon Sep 17 00:00:00 2001 From: Flipper Date: Sat, 27 Jun 2026 10:29:38 +0200 Subject: [PATCH 3/4] test(windows): note umbrella tracker #394 in red-test analysis Reference the Windows umbrella issue #394 in the analysis so every open Windows-relevant issue is accounted for: its open children (#227/#367, the mapped/SMB-drive class) are in the ruled-out table and its other children are already fixed upstream. No test or production change. Signed-off-by: Flipper --- tests/windows/RED_TEST_ANALYSIS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/windows/RED_TEST_ANALYSIS.md b/tests/windows/RED_TEST_ANALYSIS.md index a24251c29..037afc504 100644 --- a/tests/windows/RED_TEST_ANALYSIS.md +++ b/tests/windows/RED_TEST_ANALYSIS.md @@ -238,6 +238,11 @@ behaved correctly — recorded as green and **not** included as a red test: scope for a Windows-only PR. - **libgit2 1.8+ build break (#530.4).** `git_allocator` moved to ``; cross-platform compile issue, not a Windows runtime bug. +- **Windows umbrella tracker (#394).** This is a meta-issue ("8 bugs"); its + remaining open children are the mapped/SMB-drive class (#227, #367), covered in + the ruled-out table above (a `subst` mapped drive indexes and keeps its DB; a + real SMB share is not available here). Its other children (#221, #266, #274, + #331, #347, #348) are already marked fixed upstream, so no new test is shipped. - **Memory growth over hours (#581).** Requires a multi-hour soak to surface and is not deterministic in a unit/integration test; the existing `scripts/soak-test.sh` RSS-trend harness is the right vehicle and is not From 0eb2c5891381eb50ad85d9c05b8e60b9cd930ab1 Mon Sep 17 00:00:00 2001 From: Flipper Date: Sat, 4 Jul 2026 13:22:41 +0200 Subject: [PATCH 4/4] test(windows): convert fixed reds to green guards, wire CI, rewrite drive test Rebased onto current main and reworked in response to review. Three of the four Windows reds were fixed upstream since the branch was cut at b075f05, so they are now green regression guards; the fourth stays a genuine known-red. - test_non_ascii_path.py (#636/#357): green guard - fixed by #700 (per-pass readers now route through cbm_fopen -> _wfopen). Re-verified green on main. - test_hook_augment.py (#618): green guard - fixed by #619 (cbm_is_walkable_abs_path accepts drive-letter X:/ cwd). Re-verified green on main. - test_ui_drive_listing.py (#548): rewritten. The fix exposes drives via a new roots[] field, not the dirs[] array the old test asserted (which would stay red against fixed code). Now asserts every fixed drive is in roots and browsable. Re-verified green on main (drives C:/D:/E:). - test_cli_non_ascii_arg.py (#423/#20): unchanged - main() is still narrow-argv with no wide command line, so this remains genuinely red (the keeper). - scripts/test-windows.ps1: split green guards (gate CI) from opt-in known-reds; add -GuardsOnly; run indexing in-process (CBM_INDEX_SUPERVISOR=0) so a guard reflects the path/hook/drive fix under test, not the index-worker spawn path. - .github/workflows/_test.yml: new test-windows-guards job builds the product+UI binary (scripts/build.sh --with-ui) and runs the guards with -GuardsOnly so #700/#619/#548 stay enforced on Windows CI. - RED_TEST_ANALYSIS.md: refreshed to record the landed fixes and current status. Signed-off-by: Flipper Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/_test.yml | 40 +++++++ scripts/test-windows.ps1 | 131 ++++++++++++++++------ tests/windows/RED_TEST_ANALYSIS.md | 126 ++++++++++++++------- tests/windows/test_hook_augment.py | 31 +++--- tests/windows/test_non_ascii_path.py | 41 ++++--- tests/windows/test_ui_drive_listing.py | 146 +++++++++++++------------ 6 files changed, 337 insertions(+), 178 deletions(-) diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml index 200586d50..29b6b100f 100644 --- a/.github/workflows/_test.yml +++ b/.github/workflows/_test.yml @@ -124,3 +124,43 @@ jobs: run: scripts/test.sh CC=clang CXX=clang++ ${{ matrix.os == 'windows-11-arm' && 'SANITIZE=' || '' }} env: CBM_SKIP_PERF: ${{ inputs.skip_perf && '1' || '' }} + + # Windows product-surface regression guards. Distinct from test-windows above + # (the sanitizer C suite): these drive a real product binary + embedded HTTP UI + # over stdio / CLI / HTTP and fail if a Windows bug already fixed on main comes + # back -- non-ASCII repo paths dropping definitions (#636/#357, fixed by #700), + # the PreToolUse hook augmenter no-op on drive-letter cwd (#618, fixed by #619), + # and the UI directory picker not enumerating drives (#548, roots field). The + # still-open narrow-argv repro (#423/#20) is opt-in and excluded via -GuardsOnly. + test-windows-guards: + runs-on: windows-latest + timeout-minutes: 60 + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + + - uses: msys2/setup-msys2@66cd2cce69caa17b53920067426061ca1de3a884 # v2 + with: + msystem: CLANG64 + path-type: inherit + install: >- + mingw-w64-clang-x86_64-clang + mingw-w64-clang-x86_64-zlib + make + git + + - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: "22" + + - name: Build product binary with embedded UI + shell: msys2 {0} + # --with-ui builds the frontend (npm) and embeds it, so the drive-picker + # guard's HTTP UI is available. Functional gate only (no sanitizers). + run: scripts/build.sh --with-ui CC=clang CXX=clang++ + + - name: Windows regression guards (#636/#357, #618, #548) + shell: pwsh + # -GuardsOnly runs the three green guards and gates on them; the runner + # runs indexing in-process (CBM_INDEX_SUPERVISOR=0) so a guard reflects the + # path/hook/drive fix under test, not the orthogonal index-worker spawn. + run: ./scripts/test-windows.ps1 -GuardsOnly -Binary build/c/codebase-memory-mcp.exe diff --git a/scripts/test-windows.ps1 b/scripts/test-windows.ps1 index 620692a4a..896009215 100644 --- a/scripts/test-windows.ps1 +++ b/scripts/test-windows.ps1 @@ -1,22 +1,50 @@ <# .SYNOPSIS - Run the native-Windows red-test suite for codebase-memory-mcp. + Run the native-Windows product-surface test suite for codebase-memory-mcp. .DESCRIPTION - Builds the production binary (build/c/codebase-memory-mcp.exe) if it is not - already present, then runs the deterministic Windows red tests under - tests/windows/. These tests reproduce platform-specific failures at the - product surface (real MCP process, real stdio, real SQLite DB). + Builds the product binary (build/c/codebase-memory-mcp.exe) if it is not + already present, then runs the deterministic Windows integration tests under + tests/windows/ against a real codebase-memory-mcp.exe (real stdio / CLI / + HTTP UI, real SQLite DB). - The unit/invariant C suite is built and run via Makefile.cbm. On native - Windows the MinGW/LLVM toolchain ships no libasan/libubsan, so the sanitizer - flags must be disabled for the local build (SANITIZE=). Where the toolchain - *does* provide AddressSanitizer/UBSan (Linux containers, WSL), prefer - scripts/test.sh which keeps the sanitizers on. + Two categories of test: + + GUARDS - regression guards for Windows bugs already fixed on main. + They must stay GREEN (exit 0); a RED (exit 1) means the fix + regressed and fails this runner. + * test_non_ascii_path.py guards #636/#357 (fixed by #700) + * test_hook_augment.py guards #618 (fixed by #619) + * test_ui_drive_listing.py guards #548 (roots field) + + KNOWN REDS - genuine, still-open Windows bugs reproduced at the product + surface. They are EXPECTED to be RED (exit 1) and are opt-in + (never gate CI). If one turns GREEN the underlying bug was + fixed and it should be promoted to a guard. + * test_cli_non_ascii_arg.py reproduces #423/#20 (narrow + argv main() - no wide command line) + + Determinism: indexing runs in-process (CBM_INDEX_SUPERVISOR=0). These tests + exercise path / hook / drive handling, not the index-supervisor subprocess + path; the pass-level readers (#700's cbm_fopen routing) run in-process either + way, so the guard coverage is identical while results stay independent of the + local toolchain's worker-spawn behavior. + + On native Windows the MinGW/LLVM toolchain ships no libasan/libubsan, so the + build disables sanitizers (SANITIZE=). Where the toolchain provides + AddressSanitizer/UBSan (Linux containers, WSL), prefer scripts/test.sh. .PARAMETER Binary - Path to an existing codebase-memory-mcp.exe. If omitted, the script looks for - build/c/codebase-memory-mcp.exe and builds it when missing. + Path to an existing codebase-memory-mcp.exe. If omitted, the script builds it + (target selected by -Target) into build/c/. + +.PARAMETER Target + Makefile.cbm target used when building: 'cbm-with-ui' (default; needed for the + drive-picker guard's embedded HTTP UI) or 'cbm' (no UI - the drive guard then + reports a precondition and is skipped). + +.PARAMETER GuardsOnly + Run only the green guards (the CI gate). Skips the opt-in known-red repros. .PARAMETER Make Path to GNU make (default: 'make' on PATH; MSYS2 ships it at @@ -24,10 +52,15 @@ .EXAMPLE pwsh -File scripts/test-windows.ps1 +.EXAMPLE + pwsh -File scripts/test-windows.ps1 -GuardsOnly -Binary build\c\codebase-memory-mcp.exe #> [CmdletBinding()] param( [string]$Binary, + [ValidateSet("cbm-with-ui", "cbm")] + [string]$Target = "cbm-with-ui", + [switch]$GuardsOnly, [string]$Make = "make" ) @@ -37,7 +70,7 @@ Set-Location $repoRoot $python = (Get-Command python -ErrorAction SilentlyContinue) if (-not $python) { $python = (Get-Command py -ErrorAction SilentlyContinue) } -if (-not $python) { throw "Python 3 is required to run the Windows red tests." } +if (-not $python) { throw "Python 3 is required to run the Windows tests." } $py = $python.Source # A writable Windows temp dir that GNU make forwards to the native gcc. MSYS2 @@ -51,8 +84,8 @@ function Resolve-Binary { if ($Explicit) { return (Resolve-Path $Explicit).Path } $built = Join-Path $repoRoot "build\c\codebase-memory-mcp.exe" if (Test-Path $built) { return $built } - Write-Host "Building production binary via Makefile.cbm ..." -ForegroundColor Cyan - & $Make "-j" "-f" "Makefile.cbm" "cbm" "TMP=$tmp" "TEMP=$tmp" "TMPDIR=$tmp" + Write-Host "Building $Target via Makefile.cbm ..." -ForegroundColor Cyan + & $Make "-j" "-f" "Makefile.cbm" $Target "SANITIZE=" "TMP=$tmp" "TEMP=$tmp" "TMPDIR=$tmp" if ($LASTEXITCODE -ne 0) { throw "build failed (exit $LASTEXITCODE)" } if (-not (Test-Path $built)) { throw "binary not produced at $built" } return $built @@ -61,45 +94,73 @@ function Resolve-Binary { $bin = Resolve-Binary -Explicit $Binary Write-Host "Binary: $bin" -ForegroundColor Green -$env:PYTHONUTF8 = "1" # ensure the harness encodes argv/stdio as UTF-8 +$env:PYTHONUTF8 = "1" # encode argv/stdio as UTF-8 +$env:CBM_INDEX_SUPERVISOR = "0" # in-process indexing (see .DESCRIPTION) -# test_ui_drive_listing.py reproduces the UI directory-picker bug (#548) and -# therefore needs a UI build (make -f Makefile.cbm cbm-with-ui) plus a machine -# with more than one drive. Against a non-UI binary it reports a precondition -# (exit 2), which is treated as a skip-with-reason, not a failure. -$tests = @( +# Green regression guards - must stay GREEN (exit 0). RED (exit 1) = the fix for +# the referenced issue regressed. The drive-picker guard needs the embedded HTTP +# UI (build target cbm-with-ui); against a non-UI binary it reports a precondition +# (exit 2) and is skipped rather than failed. +$guards = @( "tests\windows\test_non_ascii_path.py", - "tests\windows\test_cli_non_ascii_arg.py", "tests\windows\test_hook_augment.py", "tests\windows\test_ui_drive_listing.py" ) -$reds = @() -$precond = @() -foreach ($t in $tests) { +# Opt-in known-red repros - EXPECTED red (exit 1); never gate CI. +$knownReds = @( + "tests\windows\test_cli_non_ascii_arg.py" +) + +$guardFailures = @() +$guardSkips = @() +$fixedKeepers = @() + +Write-Host "`n--- Green guards ---" -ForegroundColor Cyan +foreach ($t in $guards) { Write-Host "`n=== $t ===" -ForegroundColor Cyan & $py $t $bin $code = $LASTEXITCODE if ($code -eq 0) { Write-Host "GREEN ($t)" -ForegroundColor Green } elseif ($code -eq 1) { - Write-Host "RED ($t) - Windows-specific failure reproduced" -ForegroundColor Red - $reds += $t + Write-Host "RED ($t) - REGRESSION: a fixed Windows bug is broken again" -ForegroundColor Red + $guardFailures += $t } else { Write-Host "PRECONDITION ($t) exit=$code - skipped (see message above)" -ForegroundColor Yellow - $precond += $t + $guardSkips += $t + } +} + +if (-not $GuardsOnly) { + Write-Host "`n--- Known reds (opt-in, expected red) ---" -ForegroundColor Cyan + foreach ($t in $knownReds) { + Write-Host "`n=== $t ===" -ForegroundColor Cyan + & $py $t $bin + $code = $LASTEXITCODE + if ($code -eq 1) { + Write-Host "RED ($t) - expected; the underlying Windows bug is still open" -ForegroundColor DarkYellow + } elseif ($code -eq 0) { + Write-Host "GREEN ($t) - the bug appears FIXED; promote this to a guard" -ForegroundColor Green + $fixedKeepers += $t + } else { + Write-Host "PRECONDITION ($t) exit=$code - skipped (see message above)" -ForegroundColor Yellow + } } } Write-Host "" -if ($precond.Count -gt 0) { - Write-Host ("Precondition-skipped: {0} (e.g. test_ui_drive_listing needs a UI " -f $precond.Count) -ForegroundColor Yellow - Write-Host "build: make -f Makefile.cbm cbm-with-ui, and >1 drive)." -ForegroundColor Yellow +if ($guardSkips.Count -gt 0) { + Write-Host ("Guards skipped (precondition): {0} - e.g. the drive-picker guard " -f $guardSkips.Count) -ForegroundColor Yellow + Write-Host "needs a UI build (-Target cbm-with-ui, the default)." -ForegroundColor Yellow +} +if ($fixedKeepers.Count -gt 0) { + Write-Host ("Known-red repros that are now GREEN (promote to guards): {0}" -f ($fixedKeepers -join ", ")) -ForegroundColor Green } -if ($reds.Count -gt 0) { - Write-Host ("RED suite: {0} Windows red tests reproduced platform failures " -f $reds.Count) -ForegroundColor Red - Write-Host "(expected until fixed). See tests/windows/RED_TEST_ANALYSIS.md." -ForegroundColor Red +if ($guardFailures.Count -gt 0) { + Write-Host ("REGRESSION: {0} green guard(s) went red: {1}" -f $guardFailures.Count, ($guardFailures -join ", ")) -ForegroundColor Red + Write-Host "A previously-fixed Windows bug is broken again. See tests/windows/RED_TEST_ANALYSIS.md." -ForegroundColor Red exit 1 } -Write-Host "All runnable Windows red tests are GREEN." -ForegroundColor Green +Write-Host "All Windows green guards passed." -ForegroundColor Green exit 0 diff --git a/tests/windows/RED_TEST_ANALYSIS.md b/tests/windows/RED_TEST_ANALYSIS.md index 037afc504..8c06f6550 100644 --- a/tests/windows/RED_TEST_ANALYSIS.md +++ b/tests/windows/RED_TEST_ANALYSIS.md @@ -1,48 +1,76 @@ -# Windows Red-Test Analysis +# Windows Test Analysis -Deterministic, Windows-only red tests found during a native-Windows red-test -campaign. They reproduce platform-specific failures at the product surface and -are intended as regression guards while the underlying issues are fixed in -separate maintainer PRs. **This PR contains no production fixes.** +Deterministic, Windows-only integration tests found during a native-Windows +red-test campaign. They drive the product surface (a real `codebase-memory-mcp.exe` +over stdio / CLI / HTTP UI, real SQLite DB) and pass on Linux/macOS. + +Three of the four originally-red findings have since been fixed on `main` and are +kept here as **green regression guards** (they go red again only if the fix +regresses). The fourth is a genuine, still-open Windows bug kept as a **known red**. + +| Test | Issue | Status | +|---|---|---| +| `test_non_ascii_path.py` | #636 / #357 | GREEN guard - fixed by #700 (`cbm_fopen` routing in the pass readers) | +| `test_hook_augment.py` | #618 | GREEN guard - fixed by #619 (`cbm_is_walkable_abs_path` accepts `X:/`) | +| `test_ui_drive_listing.py` | #548 | GREEN guard - fixed (drives exposed via the `roots` field) | +| `test_cli_non_ascii_arg.py` | #423 / #20 | RED (open) - `main()` is still narrow-argv, no wide command line | + +The three green guards are wired into CI via the `test-windows-guards` job in +`.github/workflows/_test.yml` (build the product+UI binary, run the guards with +`-GuardsOnly`) so #700 / #619 / #548 stay enforced. The known red is opt-in and +never gates CI. **This PR contains no production fixes.** ## Environment - OS: Microsoft Windows 11 Pro, build 10.0.26200 -- Source build: MinGW-w64 GCC 15.2.0 (MSYS2), `make -f Makefile.cbm cbm` -- Filesystem: NTFS, code page 65001 (UTF-8 console) +- Source build: MinGW-w64 GCC 15.2.0 (MSYS2), `make -f Makefile.cbm cbm-with-ui` +- Filesystem: NTFS, code page 65001 (UTF-8 console); drives `C:`, `D:`, `E:` - Shells/launchers exercised: PowerShell 5.1 (5.1.26100), `cmd.exe`, Git Bash (MSYS2), direct Win32 process launch, Python `subprocess.Popen`, Python stdio (line-delimited JSON-RPC) transport -- CBM source commit under test: `b075f05` -- Binary: `build/c/codebase-memory-mcp.exe` (production build) +- Findings first captured at `b075f05`; re-verified after rebasing onto current + `main` (this is where the three now-fixed cases were confirmed green) +- Binary: `build/c/codebase-memory-mcp.exe` (product build, with embedded UI) + +### Determinism note (index supervisor) + +The guards drive indexing in-process via the `CBM_INDEX_SUPERVISOR=0` kill switch +(set by `scripts/test-windows.ps1`). The passes under test (`#700`'s `cbm_fopen` +routing) run in-process either way, so guard coverage is identical, while results +stay independent of the index-supervisor's separate worker process (whose spawn +behavior varies by local toolchain). The drive-picker guard does not index at all. ### Sanitizer note The MinGW/LLVM toolchain available on this machine ships **no** `libasan` / -`libubsan`, so an AddressSanitizer/UBSan build is not possible natively (the plan -anticipates this). The C unit/invariant suite (`build/c/test-runner`) was built -with `SANITIZE=` and runs; the two red tests below are product-level integration -tests that drive a real `codebase-memory-mcp.exe` over stdio. On a host where the -toolchain provides sanitizers (Linux container, WSL), the same fixtures should be -run through an ASan/UBSan binary via `scripts/test.sh`. +`libubsan`, so an AddressSanitizer/UBSan build is not possible natively. These are +product-level integration tests that drive a real `codebase-memory-mcp.exe`; the +sanitizer C suite is a separate concern (the `test-windows` CI job / `scripts/test.sh`). ## How to run ```powershell -# Builds build/c/codebase-memory-mcp.exe if missing, then runs the red suite. +# Builds build/c/codebase-memory-mcp.exe (with UI) if missing, then runs the suite. pwsh -File scripts/test-windows.ps1 +# only the green guards (the CI gate): +pwsh -File scripts/test-windows.ps1 -GuardsOnly # or, against an installed/relocated binary: pwsh -File scripts/test-windows.ps1 -Binary "C:\path\to\codebase-memory-mcp.exe" ``` -Each test exits `0` (green / invariant holds), `1` (red / Windows failure -reproduced), or `2` (environment/setup error). Standard-library Python 3 only. +Each test exits `0` (green), `1` (red), or `2` (precondition/setup). A guard that +exits `1` fails the runner (regression); a known red that exits `0` is flagged for +promotion. Standard-library Python 3 only. --- ## windows_non_ascii_repo_path_preserves_definitions -- Class: integration +**Status: GREEN guard - fixed by #700.** The text below describes the original +red (at `b075f05`); the closing note records the landed fix. The "Actual" counts +are the pre-fix observation. + +- Class: integration (green regression guard) - Test: `tests/windows/test_non_ascii_path.py` - Related issues: #636, #357, #571 (naming), #530 - Environment: Windows 11 26200, PowerShell 5.1 / Python stdio, NTFS, CP 65001 @@ -80,14 +108,21 @@ reproduced), or `2` (environment/setup error). Standard-library Python 3 only. Verified with `_wfopen` vs `fopen` on a non-ASCII path: `fopen(utf8, "rb")` returns `NULL`, `_wfopen(cbm_utf8_to_wide(utf8), L"rb")` opens the same file. -This invariant holds on Linux/macOS (byte-transparent UTF-8 filesystem); the test -turns green once the pass readers convert to wide. +**Fix landed (#700):** the per-pass readers now go through `cbm_fopen`, which on +Windows converts the UTF-8 path to wide and calls `_wfopen` (`src/foundation/compat_fs.c`). +Re-verified green on current `main`: every non-ASCII variant now matches the ASCII +baseline (12 nodes / 22 edges / 5 definitions). This invariant also holds on +Linux/macOS (byte-transparent UTF-8 filesystem). --- ## windows_cli_non_ascii_repo_path_is_honored -- Class: integration +**Status: RED (still open) - the keeper.** Re-verified on current `main`: +`main()` (`src/main.c`) is still `int main(int argc, char **argv)` with no +`wmain` / `GetCommandLineW`, so this remains genuinely red. Opt-in; not a CI gate. + +- Class: integration (known red) - Test: `tests/windows/test_cli_non_ascii_arg.py` - Related issues: #636, #423, #20 - Environment: Windows 11 26200, `cli` argv path, NTFS, CP 65001 @@ -123,7 +158,10 @@ flows that shell out to it, not the stdio server path. Holds on Linux/macOS ## windows_hook_augment_emits_context -- Class: integration +**Status: GREEN guard - fixed by #619.** The text below describes the original +red (at `b075f05`); the closing note records the landed fix. + +- Class: integration (green regression guard) - Test: `tests/windows/test_hook_augment.py` - Related issues: #618 - Environment: Windows 11 26200, `hook-augment` CLI subcommand @@ -147,16 +185,26 @@ flows that shell out to it, not the stdio server path. Holds on Linux/macOS walk-up loop (~L254): `for (...; dir[0] == '/'; ...)`. A Windows `cwd` is a drive-letter path (`C:\...` / `C:/...`), so `cwd[0]` is never `'/'`; the augmenter bails before it queries the graph. The PreToolUse Grep/Glob graph - augmentation therefore never fires on Windows. Fix direction: accept - drive-letter absolute paths (and climb them in the walk-up loop). + augmentation therefore never fires on Windows. -Holds on Linux/macOS (`cwd` starts with `/`). +**Fix landed (#619):** `hook_augment.c` now uses `cbm_is_walkable_abs_path`, which +accepts a drive-letter root (`X:/`) in addition to POSIX `/`, and the walk-up loop +climbs it. Re-verified green on current `main`: `hook-augment` emits the +`hookSpecificOutput` / `additionalContext` payload for a drive-letter `cwd`. Also +holds on Linux/macOS (`cwd` starts with `/`). --- ## windows_ui_picker_reaches_all_drives -- Class: integration +**Status: GREEN guard - fixed.** The original red asserted drives appear in the +`dirs` array; the landed fix intentionally exposes them via a separate `roots` +field, so that assertion would stay red against fixed code. The test was +**rewritten** to guard the real invariant (every fixed drive is advertised in +`roots` and is browsable). The text below describes the original red; the closing +note records the fix and the rewrite. + +- Class: integration (green regression guard) - Test: `tests/windows/test_ui_drive_listing.py` - Related issues: #548 - Environment: Windows 11 26200 with drives `C:\`, `D:\`, `E:\`; UI build @@ -177,18 +225,22 @@ Holds on Linux/macOS (`cwd` starts with `/`). RED: drives ['D:\\', 'E:\\'] are not reachable from the UI root picker ``` -- Suspected implementation area: `handle_browse` in `src/ui/http_server.c` does - `opendir(path)` for the requested path. For the root it lists only the current - drive's contents and never enumerates the logical drives - (`GetLogicalDriveStrings`). Fix direction: when the path is the filesystem root - on Windows, return the available drive letters as the directory list so the - picker can descend into any drive. +- Suspected implementation area: `handle_browse` in `src/ui/http_server.c` did + `opendir(path)` for the requested path and, for the root, listed only the + current drive's contents with no logical-drive enumeration. + +**Fix landed (#548):** `handle_browse` now calls `append_roots_json`, which on +Windows enumerates `GetLogicalDrives()` into a `"roots":["C:/","D:/",...]` array +appended to every `/api/browse` response (POSIX emits `"/"`). The rewritten guard +asserts every fixed drive is present in `roots` and that `GET /api/browse?path=X:/` +returns for it. Re-verified green on current `main` (drives `C:`/`D:`/`E:`): +`roots=['C:/','D:/','E:/']`, all reachable. This test requires a UI build because the HTTP server only starts when the -frontend is embedded (`CBM_EMBEDDED_FILE_COUNT > 0`); against a non-UI binary it -reports a precondition (exit 2), and on a single-drive machine it is not -meaningful (exit 2). Holds on Linux/macOS (a single `/` root with no drive -letters). +frontend is embedded; against a non-UI binary it reports a precondition (exit 2). +The `roots` check is meaningful even on a single-drive machine (the system drive +must be advertised and browsable), so it also gates on single-drive CI runners. +Holds on Linux/macOS (a single `/` root). --- diff --git a/tests/windows/test_hook_augment.py b/tests/windows/test_hook_augment.py index 8f0365502..be809f368 100644 --- a/tests/windows/test_hook_augment.py +++ b/tests/windows/test_hook_augment.py @@ -1,29 +1,25 @@ -r"""RED integration test — the PreToolUse hook augmenter is a no-op on Windows. +r"""GREEN regression guard — the PreToolUse hook augmenter fires on Windows. -Reproduces issue #618 at the product surface. +Guards the fix for issue #618 (landed on main via #619) at the product surface. `codebase-memory-mcp hook-augment` is the non-blocking Claude Code PreToolUse Grep/Glob augmenter: given a hook payload it should emit a `hookSpecificOutput` with `additionalContext` listing graph symbols that match the searched token. -On Windows it emits nothing for every payload. `src/cli/hook_augment.c` gates on -POSIX-style absolute paths in two places: - - cbm_cmd_hook_augment (_WIN32 branch): if (!cwd || cwd[0] != '/') return 0; - ha_resolve_and_query walk-up loop: for (... ; dir[0] == '/'; ...) - -A Windows `cwd` is a drive-letter path (`C:\...` / `C:/...`), so `cwd[0]` is -never `'/'`; the augmenter bails before it ever queries the graph. +Before #619 it emitted nothing for every payload on Windows: `src/cli/hook_augment.c` +gated on POSIX-style absolute paths (`cwd[0] == '/'` and a walk-up loop over +`dir[0] == '/'`). A Windows `cwd` is a drive-letter path (`C:\...` / `C:/...`), +so `cwd[0]` was never `'/'` and the augmenter bailed before querying the graph. +#619 added `cbm_is_walkable_abs_path` (accepts `X:/` drive-letter roots), so the +augmenter now fires for a drive-letter cwd. This test indexes a repo with a known symbol, confirms `search_graph` finds it (control — proves the index and project name are fine), then invokes `hook-augment` exactly as the installed PreToolUse hook does and asserts a -`hookSpecificOutput` payload is produced. - -Passes on Linux/macOS (`cwd` starts with `/`). Fails on native Windows until the -path guards accept drive-letter absolute paths (and the walk-up loop climbs them). +`hookSpecificOutput` payload is produced. It fails (red) if that fix regresses. +Also passes on Linux/macOS (`cwd` starts with `/`). -Exit code: 0 == augmenter fired (green), 1 == no-op (red), 2 == setup error. +Exit code: 0 == augmenter fired (green), 1 == no-op (regression), 2 == setup error. Usage: python test_hook_augment.py @@ -101,8 +97,9 @@ def main(): if fired: print("\nGREEN: PreToolUse augmenter emitted additionalContext.") return 0 - print("\nRED: hook-augment produced no hookSpecificOutput on Windows " - "(drive-letter cwd fails the cwd[0]=='/' guards in hook_augment.c).") + print("\nREGRESSION (red): hook-augment produced no hookSpecificOutput on " + "Windows (drive-letter cwd rejected — has the #619 " + "cbm_is_walkable_abs_path handling in hook_augment.c regressed?).") return 1 finally: shutil.rmtree(work, ignore_errors=True) diff --git a/tests/windows/test_non_ascii_path.py b/tests/windows/test_non_ascii_path.py index fb5bea26c..896209f80 100644 --- a/tests/windows/test_non_ascii_path.py +++ b/tests/windows/test_non_ascii_path.py @@ -1,29 +1,26 @@ -"""RED integration test — Windows non-ASCII repo path drops all definitions. +"""GREEN regression guard — non-ASCII repo paths keep all definitions on Windows. -Reproduces issue #636 / #357 at the product surface (real codebase-memory-mcp -process, real SQLite DB, real stdio). Two byte-identical TypeScript fixtures are -indexed: one under an ASCII parent path, one under a non-ASCII parent path. The -invariant under test: +Guards the fix for issue #636 / #357 (landed on main via #700) at the product +surface (real codebase-memory-mcp process, real SQLite DB, real stdio). Two +byte-identical TypeScript fixtures are indexed: one under an ASCII parent path, +one under a non-ASCII parent path. The invariant under test: A byte-identical fixture must produce equivalent graph counts regardless of whether its absolute path contains non-ASCII characters. -Observed on native Windows: the ASCII copy extracts functions/classes/methods -(12 nodes / 20 edges); every non-ASCII copy (Latin-1 accents, Cyrillic, CJK, -Greek) extracts only File/Folder nodes (5 nodes / 4 edges) — zero definitions. +Before #700 native Windows extracted only File/Folder nodes for every non-ASCII +copy (Latin-1 accents, Cyrillic, CJK, Greek) — zero definitions — while the ASCII +copy extracted functions/classes/methods. Root cause: each pipeline pass read +source bytes with plain fopen(path, "rb") (src/pipeline/pass_definitions.c, +pass_calls.c, …); on Windows fopen() interprets the UTF-8 path in the active ANSI +code page, so a non-ASCII path could not be opened and the parser received +nothing. #700 routed the per-pass reads through cbm_fopen (→ _wfopen with a wide +path, src/foundation/compat_fs.c), so non-ASCII paths now parse identically. -Root cause: each pipeline pass reads source bytes with plain fopen(path, "rb") -(src/pipeline/pass_definitions.c, pass_calls.c, pass_parallel.c, pass_semantic.c, -…). On Windows fopen() interprets the UTF-8 path in the active ANSI code page, -so a path with non-ASCII bytes cannot be opened and the parser receives nothing. -Directory discovery already uses the wide API (cbm_utf8_to_wide + FindFirstFileW -in src/foundation/compat_fs.c), which is why File/Folder nodes still appear. +This guard fails (red) if that fix regresses. It also passes on Linux/macOS +(byte-transparent UTF-8 filesystem). -This test passes on Linux/macOS (byte-transparent UTF-8 filesystem) and fails on -native Windows. It turns green once the per-pass read_file helpers convert the -UTF-8 path to wide (_wfopen) the way compat_fs.c / platform.c already do. - -Exit code: 0 == invariant holds (green), 1 == invariant violated (red), +Exit code: 0 == invariant holds (green), 1 == invariant violated (regression), 2 == environment/setup error. Usage: @@ -153,10 +150,12 @@ def main(): shutil.rmtree(work, ignore_errors=True) if failures: - print("\nRED: %d/%d non-ASCII path variants lost definitions: %s" % + print("\nREGRESSION (red): %d/%d non-ASCII path variants lost " + "definitions: %s" % (len(failures), len(NON_ASCII_SEGMENTS), ", ".join(failures))) print("Invariant violated: byte-identical fixtures under non-ASCII paths " - "must extract the same definitions as the ASCII baseline.") + "must extract the same definitions as the ASCII baseline (fixed by " + "#700 — has the cbm_fopen routing in the pass readers regressed?).") return 1 print("\nGREEN: all non-ASCII path variants matched the ASCII baseline.") return 0 diff --git a/tests/windows/test_ui_drive_listing.py b/tests/windows/test_ui_drive_listing.py index 9457bb4e9..2252c63cc 100644 --- a/tests/windows/test_ui_drive_listing.py +++ b/tests/windows/test_ui_drive_listing.py @@ -1,25 +1,26 @@ -r"""RED integration test — the UI directory picker cannot reach non-system drives. - -Reproduces issue #548 at the product surface (the embedded HTTP UI). - -The UI directory picker calls `GET /api/browse?path=...` (handle_browse in -src/ui/http_server.c). For the filesystem root it does `opendir("/")`, which on -Windows resolves to the *current* drive's root and lists only that drive's -subdirectories. There is no `GetLogicalDriveStrings` drive enumeration, so when a -user opens the picker at root, drives other than the system drive (e.g. `D:\`, -`E:\`) never appear and cannot be selected. - -This test requires a UI build (`make -f Makefile.cbm cbm-with-ui`) because the -HTTP server only starts when the frontend is embedded. It launches the server, -queries `/api/browse?path=/`, and asserts that every fixed drive on the machine -is reachable from the root listing. It is meaningful only on a machine with more -than one drive; with a single drive it reports a precondition error (exit 2). - -Passes on a correct picker that enumerates drives; fails on native Windows until -handle_browse enumerates logical drives for the root path. - -Exit code: 0 == all drives reachable (green), 1 == non-system drives missing -(red), 2 == precondition not met (single drive / no UI build / server down). +r"""GREEN regression guard — the UI directory picker enumerates all logical drives. + +Guards the fix for issue #548 (landed on main). `handle_browse` +(src/ui/http_server.c) appends a `"roots"` array to every `/api/browse` +response; on Windows `append_roots_json` fills it from `GetLogicalDrives()` as +`"C:/"`, `"D:/"`, … so the directory picker can reach every drive (POSIX emits +a single `"/"`). This test asserts that user-level invariant against the running +embedded HTTP UI. + +Before #548 the picker did `opendir("/")`, listing only the current drive's +subdirectories under `dirs` with no drive enumeration — non-system drives were +unreachable. The original red test asserted drives appeared in `dirs`; the fix +intentionally exposes them via the separate `roots` field, so this guard checks +`roots` (and that each advertised drive is actually browsable). + +Requires a UI build (`make -f Makefile.cbm cbm-with-ui`) because the HTTP server +only starts when the frontend is embedded. Runs green with a single drive (C:/ +must be advertised and browsable); a machine with D:/E: exercises the multi-drive +reach more fully. + +Exit code: 0 == all drives advertised in roots and reachable (green), +1 == a drive is missing from roots or not browsable (regression), +2 == precondition not met (no UI build / server down). Usage: python test_ui_drive_listing.py [port] @@ -32,6 +33,7 @@ import sys import tempfile import time +import urllib.parse import urllib.request @@ -64,6 +66,11 @@ def http_get_json(url, timeout=5): return json.loads(r.read().decode("utf-8", "replace")) +def browse(port, path): + return http_get_json("http://127.0.0.1:%d/api/browse?path=%s" % + (port, urllib.parse.quote(path))) + + def wait_for_server(port, timeout=20): deadline = time.time() + timeout while time.time() < deadline: @@ -75,6 +82,12 @@ def wait_for_server(port, timeout=20): return False +def norm(s): + """Canonical drive key: 'D:\\', 'D:/', 'D:', 'D' -> 'D:'.""" + s = str(s).rstrip("\\/").upper() + return s if s.endswith(":") else (s + ":" if len(s) == 1 else s) + + def main(): if len(sys.argv) < 2: print("usage: python test_ui_drive_listing.py [port]") @@ -85,11 +98,9 @@ def main(): return 2 drives = list_fixed_drives() - extra = [d for d in drives if not d.upper().startswith("C:")] print("fixed drives: %s" % drives) - if not extra: - print("PRECONDITION: only one drive present; cannot test multi-drive " - "picker. Re-run on a machine with a D:/E: drive.") + if not drives: + print("PRECONDITION: no fixed drives detected.") return 2 work = tempfile.mkdtemp(prefix="cbm_win_uidrv_") @@ -102,60 +113,59 @@ def main(): stderr=subprocess.PIPE, env=env) try: if not wait_for_server(port, timeout=25): - err = b"" - try: - proc.stderr.settimeout = None - except Exception: - pass print("PRECONDITION: HTTP server did not start on port %d. Is this a " - "UI build (make cbm-with-ui)?" % port) + "UI build (make -f Makefile.cbm cbm-with-ui)?" % port) return 2 - # Control: browsing an explicit existing directory must return entries, - # proving the endpoint works and isolating the bug to root enumeration. - import urllib.parse + # Control: browsing an explicit existing directory must return a payload + # carrying the roots field, proving the endpoint works and isolating any + # failure to drive enumeration itself. roots is appended to *every* + # browse response, so any valid directory surfaces it. home = os.environ.get("USERPROFILE") or os.path.expanduser("~") home_fwd = home.replace("\\", "/") try: - ctrl = http_get_json("http://127.0.0.1:%d/api/browse?path=%s" % - (port, urllib.parse.quote(home_fwd))) + ctrl = browse(port, home_fwd) except Exception as ex: print("PRECONDITION: control /api/browse?path=%s failed: %r" % (home_fwd, ex)) return 2 - print("control browse(%r) -> dirs(%d)" % (home_fwd, len(ctrl.get("dirs", [])))) - if not ctrl.get("dirs"): - print("PRECONDITION: control browse returned no dirs; endpoint may be " - "non-functional in this build.") + roots = ctrl.get("roots") + print("control browse(%r) -> dirs(%d) roots=%s" % + (home_fwd, len(ctrl.get("dirs", [])), roots)) + if roots is None: + print("PRECONDITION: response has no 'roots' field; build predates " + "#548 or endpoint non-functional.") return 2 - # Browse the filesystem root. - try: - root = http_get_json("http://127.0.0.1:%d/api/browse?path=/" % port) - except Exception as ex: - print("PRECONDITION: /api/browse?path=/ failed: %r" % ex) - return 2 - root_dirs = root.get("dirs", []) - print("browse('/') -> path=%r dirs(%d)=%s" % - (root.get("path"), len(root_dirs), root_dirs[:20])) - - # A correct root listing must let the user reach every drive. Accept a - # match whether the API returns "D:", "D", or "D:\\"/"D:/". - def reachable(drive_root): - letter = drive_root[0].upper() - cands = {letter, letter + ":", letter + ":\\", letter + ":/", - drive_root, drive_root.rstrip("\\/")} - return any(str(d).rstrip("\\/").upper() in - {x.rstrip("\\/").upper() for x in cands} for d in root_dirs) - - missing = [d for d in extra if not reachable(d)] - if not missing: - print("\nGREEN: all non-system drives reachable from the root picker.") - return 0 - print("\nRED: drives %s are not reachable from the UI root picker " - "(/api/browse?path=/ lists only the current drive; handle_browse " - "does not enumerate logical drives)." % missing) - return 1 + # Invariant 1: every fixed drive is advertised in roots. + adv = {norm(r) for r in roots} + missing = [d for d in drives if norm(d) not in adv] + if missing: + print("\nRED: drives %s are not advertised in the picker's roots " + "array %s (handle_browse/append_roots_json did not enumerate " + "them)." % (missing, roots)) + return 1 + + # Invariant 2: every advertised drive is actually browsable (a user can + # select it). This is the user-level reach the fix promises. + unreachable = [] + for d in drives: + drive_root = norm(d) + "/" # "D:/" + try: + resp = browse(port, drive_root) + if resp.get("roots") is None and "dirs" not in resp: + unreachable.append(d) + except Exception as ex: + print(" browse(%r) failed: %r" % (drive_root, ex)) + unreachable.append(d) + if unreachable: + print("\nRED: drives advertised in roots but not browsable: %s" % + unreachable) + return 1 + + print("\nGREEN: all %d fixed drive(s) advertised in roots and reachable " + "from the UI picker." % len(drives)) + return 0 finally: try: proc.stdin.close()