diff --git a/src/context_engine/cli.py b/src/context_engine/cli.py index 1e3e3ef..b481ccc 100644 --- a/src/context_engine/cli.py +++ b/src/context_engine/cli.py @@ -20,6 +20,7 @@ sys.stderr.reconfigure(encoding="utf-8", errors="replace") from context_engine.config import load_config, resolve_ollama_url, PROJECT_CONFIG_NAME +from context_engine.utils import project_storage_dir def _safe_cwd() -> Path: @@ -330,7 +331,7 @@ def _check_memory_capture_reachable(config, project_dir: Path) -> None: """ import socket project_name = project_dir.name - storage_base = Path(config.storage_path) / project_name + storage_base = project_storage_dir(config, project_dir) # Try the storage-local file first (authoritative), then fall back to # the default-path rendezvous file `cce serve` writes for the hook # shell script. Either is sufficient for the probe. @@ -443,7 +444,7 @@ def _show_welcome_banner(config) -> None: project_dir = _safe_cwd() project_name = project_dir.name - storage_dir = Path(config.storage_path) / project_name + storage_dir = project_storage_dir(config, project_dir) # Gather stats chunks = 0 @@ -873,8 +874,7 @@ def init(ctx: click.Context, agent: str) -> None: click.echo("") # 2. Storage - project_name = project_dir.name - storage_dir = Path(config.storage_path) / project_name + storage_dir = project_storage_dir(config, project_dir) storage_dir.mkdir(parents=True, exist_ok=True) meta_path = storage_dir / "meta.json" meta_path.write_text(json.dumps({"project_dir": str(project_dir.resolve())})) @@ -993,7 +993,7 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None: except Exception: ver = "?" project_name = _safe_cwd().name - storage = Path(config.storage_path) / project_name + storage = project_storage_dir(config, _safe_cwd()) stats_path = storage / "stats.json" chunks = 0 savings = "" @@ -1068,8 +1068,7 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None: lines.append(f" {BULLET} {label('Compress')} {value(compression_mode)}") # Token savings - project_name = _safe_cwd().name - stats_path = Path(config.storage_path) / project_name / "stats.json" + stats_path = project_storage_dir(config, _safe_cwd()) / "stats.json" lines.append("") lines.append(section("Token Savings")) lines.append("") @@ -1090,7 +1089,7 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None: except (KeyError, _json.JSONDecodeError): lines.append(f" {DOT} {dim('Error reading stats')}") else: - storage_dir = Path(config.storage_path) / _safe_cwd().name + storage_dir = project_storage_dir(config, _safe_cwd()) vectors_dir = storage_dir / "vectors" if not vectors_dir.exists(): lines.append(f" {DOT} {dim('Project not indexed yet')} {label('cce init')}") @@ -1098,7 +1097,7 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None: lines.append(f" {DOT} {dim('No usage recorded yet')} {dim('run context_search via MCP')}") # Embedding cache stats — surfaces how much the cache is actually saving. - cache_db = Path(config.storage_path) / _safe_cwd().name / "embedding_cache.db" + cache_db = project_storage_dir(config, _safe_cwd()) / "embedding_cache.db" if cache_db.exists(): try: from context_engine.indexer.embedding_cache import EmbeddingCache @@ -1764,8 +1763,7 @@ def _json_entry(name: str, stats: dict, buckets: dict, levels: dict) -> dict: key=lambda d: d.name, ) else: - project_name = _safe_cwd().name - project_dirs = [storage_root / project_name] + project_dirs = [project_storage_dir(config, _safe_cwd())] # Each report carries its bucket totals and level histogram alongside # the legacy stats.json so downstream renderers/JSON emitters can @@ -1872,7 +1870,7 @@ def clear(ctx: click.Context, yes: bool) -> None: config = ctx.obj["config"] project_name = _safe_cwd().name - storage_dir = Path(config.storage_path) / project_name + storage_dir = project_storage_dir(config, _safe_cwd()) if not storage_dir.exists(): animate(["", f" {DOT} {dim('No index data found for')} {value(project_name)}", ""]) @@ -1975,14 +1973,13 @@ def search(ctx: click.Context, query: str, top_k: int) -> None: config = ctx.obj["config"] project_dir = str(_safe_cwd()) - project_name = _safe_cwd().name async def _search(): from context_engine.storage.local_backend import LocalBackend from context_engine.indexer.embedder import Embedder from context_engine.retrieval.retriever import HybridRetriever - storage_dir = Path(config.storage_path) / project_name + storage_dir = project_storage_dir(config, _safe_cwd()) if not (storage_dir / "vectors").exists(): animate(["", f" {DOT} {dim('Not indexed yet. Run:')} {label('cce init')}", ""]) return @@ -2216,7 +2213,7 @@ def uninstall(yes: bool) -> None: # Remove index data from ~/.cce/projects/ config = load_config() - index_dir = Path(config.storage_path) / project_name + index_dir = project_storage_dir(config, project_dir) if index_dir.exists(): import shutil shutil.rmtree(index_dir) @@ -2573,7 +2570,7 @@ def sessions_status(ctx: click.Context) -> None: config = ctx.obj["config"] project_name = _safe_cwd().name - storage_base = Path(config.storage_path) / project_name + storage_base = project_storage_dir(config, _safe_cwd()) db_path = memory_db.memory_db_path(storage_base) click.echo(f" project: {project_name}") @@ -2708,8 +2705,7 @@ def sessions_prune( from context_engine.memory import db as memory_db config = ctx.obj["config"] - project_name = _safe_cwd().name - storage_base = Path(config.storage_path) / project_name + storage_base = project_storage_dir(config, _safe_cwd()) sessions_dir = storage_base / "sessions" if sessions_dir.exists(): @@ -2792,7 +2788,7 @@ def sessions_export( config = ctx.obj["config"] project_name = _safe_cwd().name - storage_base = Path(config.storage_path) / project_name + storage_base = project_storage_dir(config, _safe_cwd()) db_path = memory_db.memory_db_path(storage_base) if not db_path.exists(): click.echo(" No memory.db for this project — nothing to export.") @@ -2905,7 +2901,7 @@ def sessions_migrate(ctx: click.Context, no_archive: bool) -> None: config = ctx.obj["config"] project_name = _safe_cwd().name - storage_base = Path(config.storage_path) / project_name + storage_base = project_storage_dir(config, _safe_cwd()) db_path = memory_db.memory_db_path(storage_base) conn = memory_db.connect(db_path) @@ -3028,8 +3024,8 @@ def phase_fn(msg: str) -> None: ) # Update full_file_tokens baseline so cce savings shows codebase size - project_name = Path(project_dir).name - stats_path = Path(config.storage_path) / project_name / "stats.json" + _storage_dir = project_storage_dir(config, Path(project_dir)) + stats_path = _storage_dir / "stats.json" try: stats = json.loads(stats_path.read_text()) if stats_path.exists() else {} except (json.JSONDecodeError, OSError): @@ -3037,7 +3033,7 @@ def phase_fn(msg: str) -> None: total_tokens = 0 project_root = Path(project_dir) from context_engine.storage.local_backend import LocalBackend - backend = LocalBackend(base_path=str(Path(config.storage_path) / project_name)) + backend = LocalBackend(base_path=str(_storage_dir)) for rel_path in backend._vector_store.file_chunk_counts(): fp = project_root / rel_path if fp.exists(): @@ -3075,7 +3071,7 @@ async def _run_serve(config) -> None: project_dir = str(_safe_cwd()) project_name = _safe_cwd().name - storage_base = Path(config.storage_path) / project_name + storage_base = project_storage_dir(config, _safe_cwd()) backend = LocalBackend(base_path=str(storage_base)) embedder = Embedder(model_name=config.embedding_model) retriever = HybridRetriever(backend=backend, embedder=embedder) diff --git a/src/context_engine/dashboard/server.py b/src/context_engine/dashboard/server.py index fb52e2e..d555b3c 100644 --- a/src/context_engine/dashboard/server.py +++ b/src/context_engine/dashboard/server.py @@ -18,6 +18,7 @@ from context_engine.dashboard._page import PAGE_HTML from context_engine.indexer.pipeline import PathOutsideProjectError, run_indexing from context_engine.storage.local_backend import LocalBackend +from context_engine.utils import project_storage_dir # Mutating HTTP methods require a same-origin browser request OR a non-browser # client (Sec-Fetch-Site absent). This blocks CSRF from a malicious local page @@ -46,7 +47,7 @@ def create_app(config: Config, project_dir: Path) -> FastAPI: app is self-contained and trivial to test with TestClient. """ project_name = project_dir.name - storage_base = Path(config.storage_path) / project_name + storage_base = project_storage_dir(config, project_dir) app = FastAPI(title="CCE Dashboard", docs_url=None, redoc_url=None) diff --git a/src/context_engine/indexer/pipeline.py b/src/context_engine/indexer/pipeline.py index ecd8618..04d446d 100644 --- a/src/context_engine/indexer/pipeline.py +++ b/src/context_engine/indexer/pipeline.py @@ -22,6 +22,7 @@ from context_engine.indexer.manifest import Manifest from context_engine.models import ChunkType, GraphNode, GraphEdge, NodeType, EdgeType from context_engine.storage.local_backend import LocalBackend +from context_engine.utils import project_storage_dir # Map a chunk's semantic type to its graph node type. Without this every @@ -289,8 +290,7 @@ async def run_indexing( progress_fn is per-batch. """ project_dir = Path(project_dir) - project_name = project_dir.name - storage_base = Path(config.storage_path) / project_name + storage_base = project_storage_dir(config, project_dir) storage_base.mkdir(parents=True, exist_ok=True) async with _pipeline_lock(str(storage_base)): diff --git a/src/context_engine/integration/mcp_server.py b/src/context_engine/integration/mcp_server.py index 8a14897..6d49ba4 100644 --- a/src/context_engine/integration/mcp_server.py +++ b/src/context_engine/integration/mcp_server.py @@ -7,7 +7,7 @@ import threading from pathlib import Path -from context_engine.utils import atomic_write_text as _atomic_write_text +from context_engine.utils import atomic_write_text as _atomic_write_text, project_storage_dir from mcp.server import Server from mcp.types import Tool, TextContent @@ -398,7 +398,7 @@ def __init__(self, retriever, backend, compressor, embedder, config) -> None: project_name = Path.cwd().name self._project_name = project_name self._project_dir = str(Path.cwd()) - self._storage_base = Path(config.storage_path) / project_name + self._storage_base = project_storage_dir(config, Path.cwd()) self._storage_base.mkdir(parents=True, exist_ok=True) self._stats_path = self._storage_base / "stats.json" self._state_path = self._storage_base / "state.json" diff --git a/src/context_engine/serve_http.py b/src/context_engine/serve_http.py index 08fa9a4..7618604 100644 --- a/src/context_engine/serve_http.py +++ b/src/context_engine/serve_http.py @@ -12,6 +12,7 @@ from context_engine.config import load_config, resolve_ollama_url, PROJECT_CONFIG_NAME from context_engine.storage.local_backend import LocalBackend +from context_engine.utils import project_storage_dir from context_engine.indexer.embedder import Embedder from context_engine.compression.compressor import Compressor from context_engine.models import Chunk, ChunkType, GraphNode, GraphEdge, NodeType, EdgeType @@ -186,8 +187,7 @@ def run_http_server(config=None, host: str = "127.0.0.1", port: int = 8765) -> N project_path = Path.cwd() / PROJECT_CONFIG_NAME config = load_config(project_path=project_path if project_path.exists() else None) - project_name = Path.cwd().name - storage_base = Path(config.storage_path) / project_name + storage_base = project_storage_dir(config, Path.cwd()) storage_base.mkdir(parents=True, exist_ok=True) backend = LocalBackend(base_path=str(storage_base)) diff --git a/src/context_engine/utils.py b/src/context_engine/utils.py index e11b80c..7ee5e2e 100644 --- a/src/context_engine/utils.py +++ b/src/context_engine/utils.py @@ -1,4 +1,6 @@ """Shared utilities for CCE.""" +import hashlib +import logging import os import shutil import sys @@ -47,6 +49,58 @@ def atomic_write_text(path: Path, data: str) -> None: raise +_log = logging.getLogger(__name__) + + +def _project_slug(project_dir: Path) -> str: + """Stable per-directory slug: ``-<6hex>``. + + Same algorithm as ``editors._project_slug`` so two projects sharing a + basename (``api``, ``web``) get distinct storage directories. + Symlinks are resolved before hashing so two paths pointing at the + same on-disk directory produce the same slug. + """ + resolved = project_dir.resolve() + abs_path = str(resolved) + h = hashlib.sha256(abs_path.encode()).hexdigest()[:6] + safe = "".join( + c if (c.isascii() and (c.isalnum() or c in "-_")) else "-" + for c in resolved.name + ) + return f"{safe or 'project'}-{h}" + + +def project_storage_dir(config: object, project_dir: Path) -> Path: + """Return the per-project storage directory under ``config.storage_path``. + + Uses a ``-<6hex>`` slug so two projects sharing the same + basename (e.g. ``~/work/api`` and ``~/scratch/api``) get separate + storage directories instead of silently colliding. + + On first call, if the legacy directory (bare basename, no hash suffix) + exists but the new slug directory does not, the legacy directory is + renamed in place to preserve existing users' data. + """ + slug = _project_slug(project_dir) + storage_root = Path(config.storage_path) # type: ignore[union-attr] + slug_path = storage_root / slug + legacy_path = storage_root / project_dir.resolve().name + + if not slug_path.exists() and legacy_path.exists(): + try: + legacy_path.rename(slug_path) + _log.info("Migrated storage %s -> %s", legacy_path, slug_path) + except OSError: + _log.warning( + "Could not migrate legacy storage %s to %s; " + "using slug path (may re-index)", + legacy_path, + slug_path, + ) + + return slug_path + + def resolve_cce_binary() -> str: """Find the globally installed cce binary path. diff --git a/tests/dashboard/test_dashboard_smoke.py b/tests/dashboard/test_dashboard_smoke.py index 2034236..7b00274 100644 --- a/tests/dashboard/test_dashboard_smoke.py +++ b/tests/dashboard/test_dashboard_smoke.py @@ -11,6 +11,7 @@ from context_engine.config import Config from context_engine.dashboard.server import create_app from context_engine.memory import db as memory_db +from context_engine.utils import project_storage_dir def _setup(tmp_path: Path, *, with_stats: bool = False, with_memory: bool = False): @@ -18,8 +19,9 @@ def _setup(tmp_path: Path, *, with_stats: bool = False, with_memory: bool = Fals project_name = "smoke-project" project_dir = tmp_path / "workspace" / project_name project_dir.mkdir(parents=True) - storage_base = tmp_path / "storage" / project_name - storage_base.mkdir(parents=True) + config = Config(storage_path=str(tmp_path / "storage")) + storage_base = project_storage_dir(config, project_dir) + storage_base.mkdir(parents=True, exist_ok=True) if with_stats: (storage_base / "stats.json").write_text(json.dumps({ diff --git a/tests/dashboard/test_memory_endpoints.py b/tests/dashboard/test_memory_endpoints.py index d90efbd..5edfa29 100644 --- a/tests/dashboard/test_memory_endpoints.py +++ b/tests/dashboard/test_memory_endpoints.py @@ -10,6 +10,7 @@ from context_engine.config import Config from context_engine.dashboard.server import create_app from context_engine.memory import db as memory_db +from context_engine.utils import project_storage_dir @pytest.fixture @@ -18,18 +19,19 @@ def client(tmp_path: Path): project_dir.mkdir() storage_path = tmp_path / "storage" storage_path.mkdir() - project_storage = storage_path / "demo" - project_storage.mkdir() + + config = Config( + storage_path=str(storage_path), + embedding_model="BAAI/bge-small-en-v1.5", + ) + project_storage = project_storage_dir(config, project_dir) + project_storage.mkdir(parents=True, exist_ok=True) # Minimal manifest so /api/files / /api/status work. (project_storage / "manifest.json").write_text( json.dumps({"__schema_version": 2, "files": {}, "last_git_sha": None}) ) - config = Config( - storage_path=str(storage_path), - embedding_model="BAAI/bge-small-en-v1.5", - ) app = create_app(config, project_dir) return TestClient(app), project_storage diff --git a/tests/dashboard/test_server.py b/tests/dashboard/test_server.py index c918b12..17c2e32 100644 --- a/tests/dashboard/test_server.py +++ b/tests/dashboard/test_server.py @@ -8,14 +8,16 @@ from context_engine.config import Config from context_engine.dashboard.server import create_app +from context_engine.utils import project_storage_dir def _setup_storage(tmp_path: Path, project_name: str = "my-project") -> tuple[Path, Path]: """Create storage dir with stats.json and manifest.json; return (storage_root, project_dir).""" project_dir = tmp_path / "workspace" / project_name project_dir.mkdir(parents=True) - storage_base = tmp_path / "storage" / project_name - storage_base.mkdir(parents=True) + config = Config(storage_path=str(tmp_path / "storage")) + storage_base = project_storage_dir(config, project_dir) + storage_base.mkdir(parents=True, exist_ok=True) return storage_base, project_dir diff --git a/tests/indexer/test_pipeline_durability.py b/tests/indexer/test_pipeline_durability.py index 0f3ada9..a2533f3 100644 --- a/tests/indexer/test_pipeline_durability.py +++ b/tests/indexer/test_pipeline_durability.py @@ -36,7 +36,8 @@ def project_with_existing_index(tmp_path): def _project_storage(config, project_dir: Path) -> Path: - return Path(config.storage_path) / project_dir.name + from context_engine.utils import project_storage_dir + return project_storage_dir(config, project_dir) def _count_chunks(config, project_dir: Path) -> int: diff --git a/tests/indexer/test_pipeline_graph_types.py b/tests/indexer/test_pipeline_graph_types.py index e54c8b9..8821386 100644 --- a/tests/indexer/test_pipeline_graph_types.py +++ b/tests/indexer/test_pipeline_graph_types.py @@ -31,7 +31,8 @@ async def test_markdown_fallback_chunk_is_module_not_class(tmp_path): result = await run_indexing(config, str(project_dir), full=True) assert "README.md" in result.indexed_files - graph = GraphStore(db_path=str(storage_base / project_dir.name / "graph")) + from context_engine.utils import project_storage_dir + graph = GraphStore(db_path=str(project_storage_dir(config, project_dir) / "graph")) classes = await graph.get_nodes_by_type(NodeType.CLASS) # No real classes in a markdown file — anything here is a misclassified # fallback chunk from the regression. diff --git a/tests/indexer/test_pipeline_streaming.py b/tests/indexer/test_pipeline_streaming.py index 4bab905..97af667 100644 --- a/tests/indexer/test_pipeline_streaming.py +++ b/tests/indexer/test_pipeline_streaming.py @@ -9,7 +9,6 @@ """ from __future__ import annotations -from pathlib import Path from unittest.mock import patch import pytest @@ -81,7 +80,8 @@ async def test_streaming_preserves_total_chunk_count(many_file_project): result = await run_indexing(config, str(project_dir), full=True) - storage = Path(config.storage_path) / project_dir.name + from context_engine.utils import project_storage_dir + storage = project_storage_dir(config, project_dir) backend = LocalBackend(base_path=str(storage)) assert backend.count_chunks() == result.total_chunks # 120 single-function files → 120 function chunks (plus possibly module diff --git a/tests/integration/test_memory_loop.py b/tests/integration/test_memory_loop.py index 3c3bd87..55be149 100644 --- a/tests/integration/test_memory_loop.py +++ b/tests/integration/test_memory_loop.py @@ -23,6 +23,7 @@ from context_engine.integration.session_capture import SessionCapture from context_engine.retrieval.retriever import HybridRetriever from context_engine.storage.local_backend import LocalBackend +from context_engine.utils import project_storage_dir def _build_server(project_dir: Path, storage_root: Path) -> ContextEngineMCP: @@ -34,7 +35,7 @@ def _build_server(project_dir: Path, storage_root: Path) -> ContextEngineMCP: `__new__` bypass like the older test in test_mcp_server.py). """ config = Config(storage_path=str(storage_root)) - backend = LocalBackend(base_path=str(storage_root / project_dir.name)) + backend = LocalBackend(base_path=str(project_storage_dir(config, project_dir))) embedder = Embedder(model_name=config.embedding_model) retriever = HybridRetriever(backend=backend, embedder=embedder) compressor = Compressor(model=config.compression_model, cache=backend) @@ -75,7 +76,7 @@ async def test_decision_recorded_then_recalled_across_restart(project_and_storag assert "Decision recorded" in result[0].text # The decision should have been persisted to the per-session JSON. Locate it. - sessions_dir = storage_root / project_dir.name / "sessions" + sessions_dir = project_storage_dir(Config(storage_path=str(storage_root)), project_dir) / "sessions" session_files = [ f for f in sessions_dir.glob("*.json") if f.name != "decisions_log.json" ] @@ -207,7 +208,7 @@ async def test_session_recall_searches_decisions_log_archive(project_and_storage "Decisions remain searchable via session_recall after pruning".""" project_dir, storage_root = project_and_storage - sessions_dir = storage_root / project_dir.name / "sessions" + sessions_dir = project_storage_dir(Config(storage_path=str(storage_root)), project_dir) / "sessions" sessions_dir.mkdir(parents=True) # No on-disk per-session files — only the consolidated archive — so we diff --git a/tests/test_cli_savings_e2e.py b/tests/test_cli_savings_e2e.py index fde3e84..b32adec 100644 --- a/tests/test_cli_savings_e2e.py +++ b/tests/test_cli_savings_e2e.py @@ -13,6 +13,7 @@ from context_engine.cli import main from context_engine.config import Config +from context_engine.utils import project_storage_dir from context_engine.indexer.chunker import Chunker from context_engine.indexer.embedder import Embedder from context_engine.indexer.manifest import Manifest @@ -97,9 +98,13 @@ def runner(): def indexed_project(tmp_path): """Create a temp project, index it, return (storage_dir, project_dir).""" project_dir = tmp_path / "project" - storage_dir = tmp_path / "storage" + storage_root = tmp_path / "storage" project_dir.mkdir() - storage_dir.mkdir() + storage_root.mkdir() + + config = Config(storage_path=str(storage_root)) + storage_dir = project_storage_dir(config, project_dir) + storage_dir.mkdir(parents=True, exist_ok=True) # Write sample files for rel_path, content in SAMPLE_FILES.items(): @@ -201,14 +206,10 @@ async def test_query_and_savings(runner, indexed_project): # Run cce savings and verify output config = Config(storage_path=str(storage_dir.parent)) - # storage_dir is tmp_path/storage, project name = "storage" - project_name = storage_dir.name - with runner.isolated_filesystem(): - cwd = Path.cwd() / project_name - cwd.mkdir(parents=True, exist_ok=True) - with patch("context_engine.cli.load_config", return_value=config), \ - patch("context_engine.cli.Path.cwd", return_value=cwd): - result = runner.invoke(main, ["savings"]) + project_name = project_dir.name + with patch("context_engine.cli.load_config", return_value=config), \ + patch("context_engine.cli._safe_cwd", return_value=project_dir): + result = runner.invoke(main, ["savings"]) assert result.exit_code == 0, f"savings failed:\n{result.output}" out = result.output @@ -233,12 +234,9 @@ async def test_query_and_savings(runner, indexed_project): assert "Total saved" in out # JSON output also works - with runner.isolated_filesystem(): - cwd = Path.cwd() / project_name - cwd.mkdir(parents=True, exist_ok=True) - with patch("context_engine.cli.load_config", return_value=config), \ - patch("context_engine.cli.Path.cwd", return_value=cwd): - json_result = runner.invoke(main, ["savings", "--json"]) + with patch("context_engine.cli.load_config", return_value=config), \ + patch("context_engine.cli._safe_cwd", return_value=project_dir): + json_result = runner.invoke(main, ["savings", "--json"]) assert json_result.exit_code == 0 data = json.loads(json_result.output) diff --git a/tests/test_project_storage_dir.py b/tests/test_project_storage_dir.py new file mode 100644 index 0000000..2e85470 --- /dev/null +++ b/tests/test_project_storage_dir.py @@ -0,0 +1,91 @@ +"""Tests for project_storage_dir — the slug-based storage path helper.""" +from pathlib import Path +from types import SimpleNamespace + +from context_engine.utils import project_storage_dir + + +def _config(tmp_path: Path) -> SimpleNamespace: + return SimpleNamespace(storage_path=str(tmp_path)) + + +def test_same_basename_different_parents_produce_different_dirs(tmp_path): + """Two projects with the same basename but different parents must NOT collide.""" + cfg = _config(tmp_path) + dir_a = Path("/home/user/work/api") + dir_b = Path("/home/user/scratch/api") + + result_a = project_storage_dir(cfg, dir_a) + result_b = project_storage_dir(cfg, dir_b) + + assert result_a != result_b + assert result_a.parent == result_b.parent == tmp_path + + +def test_same_path_is_deterministic(tmp_path): + """Calling with the same project_dir always returns the same storage dir.""" + cfg = _config(tmp_path) + project = Path("/home/user/myproject") + + assert project_storage_dir(cfg, project) == project_storage_dir(cfg, project) + + +def test_slug_contains_basename_and_hex(tmp_path): + """The slug directory name should be -<6hex>.""" + cfg = _config(tmp_path) + project = Path("/home/user/my-app") + + result = project_storage_dir(cfg, project) + name = result.name + + assert name.startswith("my-app-") + hex_suffix = name.split("-")[-1] + assert len(hex_suffix) == 6 + # Must be valid hex + int(hex_suffix, 16) + + +def test_legacy_dir_is_migrated(tmp_path): + """If the legacy (bare basename) dir exists and slug dir does not, rename it.""" + cfg = _config(tmp_path) + project = tmp_path / "subdir" / "api" + project.mkdir(parents=True) + + # The legacy dir uses the resolved basename + legacy = tmp_path / "api" + legacy.mkdir() + (legacy / "vectors").mkdir() + (legacy / "marker.txt").write_text("legacy-data") + + result = project_storage_dir(cfg, project) + + # Legacy dir should have been renamed to the slug dir + assert result.exists() + assert (result / "marker.txt").read_text() == "legacy-data" + assert (result / "vectors").is_dir() + # Legacy path should no longer exist + assert not legacy.exists() + + +def test_no_migration_if_slug_dir_already_exists(tmp_path): + """If the slug dir already exists, the legacy dir is left alone.""" + cfg = _config(tmp_path) + project = tmp_path / "subdir" / "api" + project.mkdir(parents=True) + + # Create both legacy and slug dirs + legacy = tmp_path / "api" + legacy.mkdir() + (legacy / "old.txt").write_text("old") + + slug_dir = project_storage_dir(cfg, project) + # First call migrated; now create a "new legacy" to verify no second migration + legacy.mkdir() + (legacy / "new.txt").write_text("new") + + result = project_storage_dir(cfg, project) + + assert result == slug_dir + # Legacy dir should still exist (not migrated again) + assert legacy.exists() + assert (legacy / "new.txt").read_text() == "new" diff --git a/tests/test_real_life.py b/tests/test_real_life.py index c429941..2a826b5 100644 --- a/tests/test_real_life.py +++ b/tests/test_real_life.py @@ -18,6 +18,7 @@ from context_engine.indexer.pipeline import run_indexing from context_engine.storage.local_backend import LocalBackend from context_engine.storage.vector_store import VectorStore, _to_list +from context_engine.utils import project_storage_dir from context_engine.retrieval.retriever import HybridRetriever from context_engine.compression.compressor import Compressor from context_engine.integration.mcp_server import ContextEngineMCP @@ -169,7 +170,7 @@ async def test_non_git_project_full_pipeline(self, non_git_project, tmp_path, em assert len(result.errors) == 0, f"Indexing errors: {result.errors}" # Search via retriever — use same path the pipeline wrote to - project_storage = storage_base / non_git_project.name + project_storage = project_storage_dir(config, non_git_project) backend = LocalBackend(base_path=str(project_storage)) retriever = HybridRetriever(backend=backend, embedder=embedder) chunks = await retriever.retrieve("hello world", top_k=5) @@ -187,7 +188,7 @@ async def test_git_project_full_pipeline(self, git_project, tmp_path, embedder): assert result.total_chunks > 0 assert len(result.errors) == 0 - project_storage = storage_base / git_project.name + project_storage = project_storage_dir(config, git_project) backend = LocalBackend(base_path=str(project_storage)) retriever = HybridRetriever(backend=backend, embedder=embedder) chunks = await retriever.retrieve("greet function", top_k=5) @@ -248,7 +249,7 @@ async def test_context_search_returns_results(self, git_project, tmp_path, embed await run_indexing(config, str(git_project), full=True) - project_storage = storage_base / git_project.name + project_storage = project_storage_dir(config, git_project) backend = LocalBackend(base_path=str(project_storage)) retriever = HybridRetriever(backend=backend, embedder=embedder) compressor = Compressor() @@ -277,7 +278,7 @@ async def test_context_search_updates_stats(self, git_project, tmp_path, embedde await run_indexing(config, str(git_project), full=True) - project_storage = storage_base / git_project.name + project_storage = project_storage_dir(config, git_project) backend = LocalBackend(base_path=str(project_storage)) retriever = HybridRetriever(backend=backend, embedder=embedder) compressor = Compressor() @@ -334,7 +335,7 @@ async def test_graph_expansion_pulls_related_file(self, linked_project, tmp_path await run_indexing(config, str(linked_project), full=True) - project_storage = storage_base / linked_project.name + project_storage = project_storage_dir(config, linked_project) backend = LocalBackend(base_path=str(project_storage)) retriever = HybridRetriever(backend=backend, embedder=embedder)