diff --git a/src/db/connection.js b/src/db/connection.js index b16828a0..75ee4a6d 100644 --- a/src/db/connection.js +++ b/src/db/connection.js @@ -1,11 +1,61 @@ +import { execFileSync } from 'node:child_process'; import fs from 'node:fs'; import path from 'node:path'; import Database from 'better-sqlite3'; -import { warn } from '../infrastructure/logger.js'; +import { debug, warn } from '../infrastructure/logger.js'; import { DbError } from '../shared/errors.js'; import { Repository } from './repository/base.js'; import { SqliteRepository } from './repository/sqlite-repository.js'; +let _cachedRepoRoot; // undefined = not computed, null = not a git repo +let _cachedRepoRootCwd; // cwd at the time the cache was populated + +/** + * Return the git worktree/repo root for the given directory (or cwd). + * Uses `git rev-parse --show-toplevel` which returns the correct root + * for both regular repos and git worktrees. + * Results are cached per-process when called without arguments. + * The cache is keyed on cwd so it invalidates if the working directory changes + * (e.g. MCP server serving multiple sessions). + * @param {string} [fromDir] - Directory to resolve from (defaults to cwd) + * @returns {string | null} Absolute path to repo root, or null if not in a git repo + */ +export function findRepoRoot(fromDir) { + const dir = fromDir || process.cwd(); + if (!fromDir && _cachedRepoRoot !== undefined && _cachedRepoRootCwd === dir) { + return _cachedRepoRoot; + } + let root = null; + try { + const raw = execFileSync('git', ['rev-parse', '--show-toplevel'], { + cwd: dir, + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + }).trim(); + // Use realpathSync to resolve symlinks (macOS /var → /private/var) and + // 8.3 short names (Windows RUNNER~1 → runneradmin) so the ceiling path + // matches the realpathSync'd dir in findDbPath. + try { + root = fs.realpathSync(raw); + } catch { + root = path.resolve(raw); + } + } catch { + root = null; + } + if (!fromDir) { + _cachedRepoRoot = root; + _cachedRepoRootCwd = dir; + } + return root; +} + +/** Reset the cached repo root (for testing). */ +export function _resetRepoRootCache() { + _cachedRepoRoot = undefined; + _cachedRepoRootCwd = undefined; +} + function isProcessAlive(pid) { try { process.kill(pid, 0); @@ -46,6 +96,22 @@ function releaseAdvisoryLock(lockPath) { } } +/** + * Check if two paths refer to the same directory. + * Handles Windows 8.3 short names (RUNNER~1 vs runneradmin) and macOS + * symlinks (/tmp vs /private/tmp) where string comparison fails. + */ +function isSameDirectory(a, b) { + if (path.resolve(a) === path.resolve(b)) return true; + try { + const sa = fs.statSync(a); + const sb = fs.statSync(b); + return sa.dev === sb.dev && sa.ino === sb.ino; + } catch { + return false; + } +} + export function openDb(dbPath) { const dir = path.dirname(dbPath); if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); @@ -64,15 +130,41 @@ export function closeDb(db) { export function findDbPath(customPath) { if (customPath) return path.resolve(customPath); - let dir = process.cwd(); + const rawCeiling = findRepoRoot(); + // Normalize ceiling with realpathSync to resolve 8.3 short names (Windows + // RUNNER~1 → runneradmin) and symlinks (macOS /var → /private/var). + // findRepoRoot already applies realpathSync internally, but the git output + // may still contain short names on some Windows CI environments. + let ceiling; + if (rawCeiling) { + try { + ceiling = fs.realpathSync(rawCeiling); + } catch { + ceiling = rawCeiling; + } + } else { + ceiling = null; + } + // Resolve symlinks (e.g. macOS /var → /private/var) so dir matches ceiling from git + let dir; + try { + dir = fs.realpathSync(process.cwd()); + } catch { + dir = process.cwd(); + } while (true) { const candidate = path.join(dir, '.codegraph', 'graph.db'); if (fs.existsSync(candidate)) return candidate; + if (ceiling && isSameDirectory(dir, ceiling)) { + debug(`findDbPath: stopped at git ceiling ${ceiling}`); + break; + } const parent = path.dirname(dir); if (parent === dir) break; dir = parent; } - return path.join(process.cwd(), '.codegraph', 'graph.db'); + const base = ceiling || process.cwd(); + return path.join(base, '.codegraph', 'graph.db'); } /** diff --git a/src/db/index.js b/src/db/index.js index 59a42808..82ffe2d2 100644 --- a/src/db/index.js +++ b/src/db/index.js @@ -1,5 +1,12 @@ // Barrel re-export — keeps all existing `import { ... } from '…/db/index.js'` working. -export { closeDb, findDbPath, openDb, openReadonlyOrFail, openRepo } from './connection.js'; +export { + closeDb, + findDbPath, + findRepoRoot, + openDb, + openReadonlyOrFail, + openRepo, +} from './connection.js'; export { getBuildMeta, initSchema, MIGRATIONS, setBuildMeta } from './migrations.js'; export { fanInJoinSQL, diff --git a/tests/unit/db.test.js b/tests/unit/db.test.js index 7d338971..a1be6c4b 100644 --- a/tests/unit/db.test.js +++ b/tests/unit/db.test.js @@ -2,14 +2,28 @@ * Unit tests for src/db.js — build_meta helpers included */ +// Note: due to vi.mock hoisting, this resolves to the spy (which delegates +// to the real impl by default). Safe for setup calls before mockImplementationOnce. +import { execFileSync as execFileSyncForSetup } from 'node:child_process'; import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; import Database from 'better-sqlite3'; -import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest'; + +const execFileSyncSpy = vi.hoisted(() => vi.fn()); + +vi.mock('node:child_process', async (importOriginal) => { + const mod = await importOriginal(); + execFileSyncSpy.mockImplementation(mod.execFileSync); + return { ...mod, execFileSync: execFileSyncSpy }; +}); + +import { _resetRepoRootCache } from '../../src/db/connection.js'; import { closeDb, findDbPath, + findRepoRoot, getBuildMeta, initSchema, MIGRATIONS, @@ -131,11 +145,13 @@ describe('findDbPath', () => { const origCwd = process.cwd; process.cwd = () => deepDir; try { + _resetRepoRootCache(); const result = findDbPath(); expect(result).toContain('.codegraph'); expect(result).toContain('graph.db'); } finally { process.cwd = origCwd; + _resetRepoRootCache(); } }); @@ -143,12 +159,16 @@ describe('findDbPath', () => { const emptyDir = fs.mkdtempSync(path.join(tmpDir, 'empty-')); const origCwd = process.cwd; process.cwd = () => emptyDir; + _resetRepoRootCache(); + execFileSyncSpy.mockImplementationOnce(() => { + throw new Error('not a git repo'); + }); try { const result = findDbPath(); - expect(result).toContain('.codegraph'); - expect(result).toContain('graph.db'); + expect(result).toBe(path.join(emptyDir, '.codegraph', 'graph.db')); } finally { process.cwd = origCwd; + _resetRepoRootCache(); } }); }); @@ -194,6 +214,143 @@ describe('build_meta', () => { }); }); +describe('findRepoRoot', () => { + beforeEach(() => { + _resetRepoRootCache(); + }); + + afterEach(() => { + _resetRepoRootCache(); + }); + + it('returns normalized git toplevel for the current repo', () => { + _resetRepoRootCache(); + const root = findRepoRoot(); + expect(root).toBeTruthy(); + expect(path.isAbsolute(root)).toBe(true); + // Should contain a .git entry at the root + expect(fs.existsSync(path.join(root, '.git'))).toBe(true); + }); + + it('returns null when not in a git repo', () => { + execFileSyncSpy.mockImplementationOnce(() => { + throw new Error('not a git repo'); + }); + const root = findRepoRoot(os.tmpdir()); + expect(root).toBeNull(); + }); + + it('caches results when called without arguments', () => { + _resetRepoRootCache(); + execFileSyncSpy.mockClear(); + const first = findRepoRoot(); + const second = findRepoRoot(); + expect(first).toBe(second); + expect(execFileSyncSpy).toHaveBeenCalledTimes(1); + }); + + it('bypasses cache when called with explicit dir', () => { + _resetRepoRootCache(); + execFileSyncSpy.mockClear(); + const fromCwd = findRepoRoot(); + const fromExplicit = findRepoRoot(process.cwd()); + expect(fromExplicit).toBe(fromCwd); + // First call populates cache, second call with explicit dir must call again + expect(execFileSyncSpy).toHaveBeenCalledTimes(2); + }); +}); + +describe('findDbPath with git ceiling', () => { + let outerDir; + let worktreeRoot; + let innerDir; + + beforeAll(() => { + // Simulate a worktree-inside-repo layout: + // outerDir/.codegraph/graph.db (parent repo DB — should NOT be found) + // outerDir/worktree/ (git init here — acts as ceiling) + // outerDir/worktree/sub/ (cwd inside worktree) + outerDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-ceiling-')); + worktreeRoot = path.join(outerDir, 'worktree'); + fs.mkdirSync(path.join(outerDir, '.codegraph'), { recursive: true }); + fs.writeFileSync(path.join(outerDir, '.codegraph', 'graph.db'), ''); + fs.mkdirSync(path.join(worktreeRoot, 'sub'), { recursive: true }); + // Initialize a real git repo at the worktree root so findRepoRoot returns it + execFileSyncForSetup('git', ['init'], { cwd: worktreeRoot, stdio: 'pipe' }); + // Resolve symlinks (macOS /var → /private/var) and 8.3 short names + // (Windows RUNNER~1 → runneradmin) so test paths match findRepoRoot output. + outerDir = fs.realpathSync(outerDir); + worktreeRoot = fs.realpathSync(worktreeRoot); + innerDir = path.join(worktreeRoot, 'sub'); + }); + + afterAll(() => { + fs.rmSync(outerDir, { recursive: true, force: true }); + }); + + afterEach(() => { + _resetRepoRootCache(); + }); + + it('stops at git ceiling and does not find parent DB', () => { + // No DB inside the worktree — the only DB is in outerDir (beyond the ceiling). + // Without the ceiling fix, findDbPath would walk up and find outerDir's DB. + const origCwd = process.cwd; + process.cwd = () => innerDir; + try { + _resetRepoRootCache(); + // Use findRepoRoot() for the expected ceiling — git may resolve 8.3 short + // names (Windows RUNNER~1 → runneradmin) or symlinks (macOS /tmp → /private/tmp) + // differently than fs.realpathSync on the test's worktreeRoot. + const ceiling = findRepoRoot(); + const result = findDbPath(); + // Should return default path at the ceiling root, NOT the outer DB + expect(result).toBe(path.join(ceiling, '.codegraph', 'graph.db')); + expect(result).not.toContain(`${path.basename(outerDir)}${path.sep}.codegraph`); + } finally { + process.cwd = origCwd; + } + }); + + it('finds DB within the ceiling boundary', () => { + // Create a DB inside the worktree — should be found normally + fs.mkdirSync(path.join(worktreeRoot, '.codegraph'), { recursive: true }); + fs.writeFileSync(path.join(worktreeRoot, '.codegraph', 'graph.db'), ''); + const origCwd = process.cwd; + process.cwd = () => innerDir; + try { + _resetRepoRootCache(); + const result = findDbPath(); + // Verify the DB was found (file exists) and is the worktree DB, not the outer one + expect(fs.existsSync(result)).toBe(true); + expect(result).toMatch(/\.codegraph[/\\]graph\.db$/); + // The outer DB is at outerDir/.codegraph — verify we didn't find that one + expect(result).not.toContain(`${path.basename(outerDir)}${path.sep}.codegraph`); + } finally { + process.cwd = origCwd; + fs.rmSync(path.join(worktreeRoot, '.codegraph'), { recursive: true, force: true }); + } + }); + + it('falls back gracefully when not in a git repo', () => { + const emptyDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-nogit-')); + const origCwd = process.cwd; + process.cwd = () => emptyDir; + _resetRepoRootCache(); + execFileSyncSpy.mockImplementationOnce(() => { + throw new Error('not a git repo'); + }); + try { + const result = findDbPath(); + // Should return default path at cwd since there's no git ceiling + expect(result).toBe(path.join(emptyDir, '.codegraph', 'graph.db')); + } finally { + process.cwd = origCwd; + fs.rmSync(emptyDir, { recursive: true, force: true }); + } + }); +}); + describe('openReadonlyOrFail', () => { it('throws DbError when DB does not exist', () => { expect.assertions(4);