Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 20 additions & 24 deletions src/context_engine/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
sys.stderr.reconfigure(encoding="utf-8", errors="replace")

from context_engine.config import load_config, resolve_ollama_url, PROJECT_CONFIG_NAME
from context_engine.utils import project_storage_dir


def _safe_cwd() -> Path:
Expand Down Expand Up @@ -330,7 +331,7 @@ def _check_memory_capture_reachable(config, project_dir: Path) -> None:
"""
import socket
project_name = project_dir.name
storage_base = Path(config.storage_path) / project_name
storage_base = project_storage_dir(config, project_dir)
# Try the storage-local file first (authoritative), then fall back to
# the default-path rendezvous file `cce serve` writes for the hook
# shell script. Either is sufficient for the probe.
Expand Down Expand Up @@ -443,7 +444,7 @@ def _show_welcome_banner(config) -> None:

project_dir = _safe_cwd()
project_name = project_dir.name
storage_dir = Path(config.storage_path) / project_name
storage_dir = project_storage_dir(config, project_dir)

# Gather stats
chunks = 0
Expand Down Expand Up @@ -873,8 +874,7 @@ def init(ctx: click.Context, agent: str) -> None:
click.echo("")

# 2. Storage
project_name = project_dir.name
storage_dir = Path(config.storage_path) / project_name
storage_dir = project_storage_dir(config, project_dir)
storage_dir.mkdir(parents=True, exist_ok=True)
meta_path = storage_dir / "meta.json"
meta_path.write_text(json.dumps({"project_dir": str(project_dir.resolve())}))
Expand Down Expand Up @@ -993,7 +993,7 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None:
except Exception:
ver = "?"
project_name = _safe_cwd().name
storage = Path(config.storage_path) / project_name
storage = project_storage_dir(config, _safe_cwd())
stats_path = storage / "stats.json"
chunks = 0
savings = ""
Expand Down Expand Up @@ -1068,8 +1068,7 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None:
lines.append(f" {BULLET} {label('Compress')} {value(compression_mode)}")

# Token savings
project_name = _safe_cwd().name
stats_path = Path(config.storage_path) / project_name / "stats.json"
stats_path = project_storage_dir(config, _safe_cwd()) / "stats.json"
lines.append("")
lines.append(section("Token Savings"))
lines.append("")
Expand All @@ -1090,15 +1089,15 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None:
except (KeyError, _json.JSONDecodeError):
lines.append(f" {DOT} {dim('Error reading stats')}")
else:
storage_dir = Path(config.storage_path) / _safe_cwd().name
storage_dir = project_storage_dir(config, _safe_cwd())
vectors_dir = storage_dir / "vectors"
if not vectors_dir.exists():
lines.append(f" {DOT} {dim('Project not indexed yet')} {label('cce init')}")
else:
lines.append(f" {DOT} {dim('No usage recorded yet')} {dim('run context_search via MCP')}")

# Embedding cache stats — surfaces how much the cache is actually saving.
cache_db = Path(config.storage_path) / _safe_cwd().name / "embedding_cache.db"
cache_db = project_storage_dir(config, _safe_cwd()) / "embedding_cache.db"
if cache_db.exists():
try:
from context_engine.indexer.embedding_cache import EmbeddingCache
Expand Down Expand Up @@ -1764,8 +1763,7 @@ def _json_entry(name: str, stats: dict, buckets: dict, levels: dict) -> dict:
key=lambda d: d.name,
)
else:
project_name = _safe_cwd().name
project_dirs = [storage_root / project_name]
project_dirs = [project_storage_dir(config, _safe_cwd())]

# Each report carries its bucket totals and level histogram alongside
# the legacy stats.json so downstream renderers/JSON emitters can
Expand Down Expand Up @@ -1872,7 +1870,7 @@ def clear(ctx: click.Context, yes: bool) -> None:

config = ctx.obj["config"]
project_name = _safe_cwd().name
storage_dir = Path(config.storage_path) / project_name
storage_dir = project_storage_dir(config, _safe_cwd())

if not storage_dir.exists():
animate(["", f" {DOT} {dim('No index data found for')} {value(project_name)}", ""])
Expand Down Expand Up @@ -1975,14 +1973,13 @@ def search(ctx: click.Context, query: str, top_k: int) -> None:

config = ctx.obj["config"]
project_dir = str(_safe_cwd())
project_name = _safe_cwd().name

async def _search():
from context_engine.storage.local_backend import LocalBackend
from context_engine.indexer.embedder import Embedder
from context_engine.retrieval.retriever import HybridRetriever

storage_dir = Path(config.storage_path) / project_name
storage_dir = project_storage_dir(config, _safe_cwd())
if not (storage_dir / "vectors").exists():
animate(["", f" {DOT} {dim('Not indexed yet. Run:')} {label('cce init')}", ""])
return
Expand Down Expand Up @@ -2216,7 +2213,7 @@ def uninstall(yes: bool) -> None:

# Remove index data from ~/.cce/projects/<project>
config = load_config()
index_dir = Path(config.storage_path) / project_name
index_dir = project_storage_dir(config, project_dir)
if index_dir.exists():
import shutil
shutil.rmtree(index_dir)
Expand Down Expand Up @@ -2573,7 +2570,7 @@ def sessions_status(ctx: click.Context) -> None:

config = ctx.obj["config"]
project_name = _safe_cwd().name
storage_base = Path(config.storage_path) / project_name
storage_base = project_storage_dir(config, _safe_cwd())
db_path = memory_db.memory_db_path(storage_base)

click.echo(f" project: {project_name}")
Expand Down Expand Up @@ -2708,8 +2705,7 @@ def sessions_prune(
from context_engine.memory import db as memory_db

config = ctx.obj["config"]
project_name = _safe_cwd().name
storage_base = Path(config.storage_path) / project_name
storage_base = project_storage_dir(config, _safe_cwd())
sessions_dir = storage_base / "sessions"

if sessions_dir.exists():
Expand Down Expand Up @@ -2792,7 +2788,7 @@ def sessions_export(

config = ctx.obj["config"]
project_name = _safe_cwd().name
storage_base = Path(config.storage_path) / project_name
storage_base = project_storage_dir(config, _safe_cwd())
db_path = memory_db.memory_db_path(storage_base)
if not db_path.exists():
click.echo(" No memory.db for this project — nothing to export.")
Expand Down Expand Up @@ -2905,7 +2901,7 @@ def sessions_migrate(ctx: click.Context, no_archive: bool) -> None:

config = ctx.obj["config"]
project_name = _safe_cwd().name
storage_base = Path(config.storage_path) / project_name
storage_base = project_storage_dir(config, _safe_cwd())
db_path = memory_db.memory_db_path(storage_base)

conn = memory_db.connect(db_path)
Expand Down Expand Up @@ -3028,16 +3024,16 @@ def phase_fn(msg: str) -> None:
)

# Update full_file_tokens baseline so cce savings shows codebase size
project_name = Path(project_dir).name
stats_path = Path(config.storage_path) / project_name / "stats.json"
_storage_dir = project_storage_dir(config, Path(project_dir))
stats_path = _storage_dir / "stats.json"
try:
stats = json.loads(stats_path.read_text()) if stats_path.exists() else {}
except (json.JSONDecodeError, OSError):
stats = {}
total_tokens = 0
project_root = Path(project_dir)
from context_engine.storage.local_backend import LocalBackend
backend = LocalBackend(base_path=str(Path(config.storage_path) / project_name))
backend = LocalBackend(base_path=str(_storage_dir))
for rel_path in backend._vector_store.file_chunk_counts():
fp = project_root / rel_path
if fp.exists():
Expand Down Expand Up @@ -3075,7 +3071,7 @@ async def _run_serve(config) -> None:

project_dir = str(_safe_cwd())
project_name = _safe_cwd().name
storage_base = Path(config.storage_path) / project_name
storage_base = project_storage_dir(config, _safe_cwd())
backend = LocalBackend(base_path=str(storage_base))
embedder = Embedder(model_name=config.embedding_model)
retriever = HybridRetriever(backend=backend, embedder=embedder)
Expand Down
3 changes: 2 additions & 1 deletion src/context_engine/dashboard/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from context_engine.dashboard._page import PAGE_HTML
from context_engine.indexer.pipeline import PathOutsideProjectError, run_indexing
from context_engine.storage.local_backend import LocalBackend
from context_engine.utils import project_storage_dir

# Mutating HTTP methods require a same-origin browser request OR a non-browser
# client (Sec-Fetch-Site absent). This blocks CSRF from a malicious local page
Expand Down Expand Up @@ -46,7 +47,7 @@ def create_app(config: Config, project_dir: Path) -> FastAPI:
app is self-contained and trivial to test with TestClient.
"""
project_name = project_dir.name
storage_base = Path(config.storage_path) / project_name
storage_base = project_storage_dir(config, project_dir)

app = FastAPI(title="CCE Dashboard", docs_url=None, redoc_url=None)

Expand Down
4 changes: 2 additions & 2 deletions src/context_engine/indexer/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from context_engine.indexer.manifest import Manifest
from context_engine.models import ChunkType, GraphNode, GraphEdge, NodeType, EdgeType
from context_engine.storage.local_backend import LocalBackend
from context_engine.utils import project_storage_dir


# Map a chunk's semantic type to its graph node type. Without this every
Expand Down Expand Up @@ -289,8 +290,7 @@ async def run_indexing(
progress_fn is per-batch.
"""
project_dir = Path(project_dir)
project_name = project_dir.name
storage_base = Path(config.storage_path) / project_name
storage_base = project_storage_dir(config, project_dir)
storage_base.mkdir(parents=True, exist_ok=True)

async with _pipeline_lock(str(storage_base)):
Expand Down
4 changes: 2 additions & 2 deletions src/context_engine/integration/mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import threading
from pathlib import Path

from context_engine.utils import atomic_write_text as _atomic_write_text
from context_engine.utils import atomic_write_text as _atomic_write_text, project_storage_dir

from mcp.server import Server
from mcp.types import Tool, TextContent
Expand Down Expand Up @@ -398,7 +398,7 @@ def __init__(self, retriever, backend, compressor, embedder, config) -> None:
project_name = Path.cwd().name
self._project_name = project_name
self._project_dir = str(Path.cwd())
self._storage_base = Path(config.storage_path) / project_name
self._storage_base = project_storage_dir(config, Path.cwd())
self._storage_base.mkdir(parents=True, exist_ok=True)
self._stats_path = self._storage_base / "stats.json"
self._state_path = self._storage_base / "state.json"
Expand Down
4 changes: 2 additions & 2 deletions src/context_engine/serve_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from context_engine.config import load_config, resolve_ollama_url, PROJECT_CONFIG_NAME
from context_engine.storage.local_backend import LocalBackend
from context_engine.utils import project_storage_dir
from context_engine.indexer.embedder import Embedder
from context_engine.compression.compressor import Compressor
from context_engine.models import Chunk, ChunkType, GraphNode, GraphEdge, NodeType, EdgeType
Expand Down Expand Up @@ -186,8 +187,7 @@ def run_http_server(config=None, host: str = "127.0.0.1", port: int = 8765) -> N
project_path = Path.cwd() / PROJECT_CONFIG_NAME
config = load_config(project_path=project_path if project_path.exists() else None)

project_name = Path.cwd().name
storage_base = Path(config.storage_path) / project_name
storage_base = project_storage_dir(config, Path.cwd())
storage_base.mkdir(parents=True, exist_ok=True)

backend = LocalBackend(base_path=str(storage_base))
Expand Down
54 changes: 54 additions & 0 deletions src/context_engine/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""Shared utilities for CCE."""
import hashlib
import logging
import os
import shutil
import sys
Expand Down Expand Up @@ -47,6 +49,58 @@ def atomic_write_text(path: Path, data: str) -> None:
raise


_log = logging.getLogger(__name__)


def _project_slug(project_dir: Path) -> str:
"""Stable per-directory slug: ``<sanitised-basename>-<6hex>``.

Same algorithm as ``editors._project_slug`` so two projects sharing a
basename (``api``, ``web``) get distinct storage directories.
Symlinks are resolved before hashing so two paths pointing at the
same on-disk directory produce the same slug.
"""
resolved = project_dir.resolve()
abs_path = str(resolved)
h = hashlib.sha256(abs_path.encode()).hexdigest()[:6]
safe = "".join(
c if (c.isascii() and (c.isalnum() or c in "-_")) else "-"
for c in resolved.name
)
return f"{safe or 'project'}-{h}"


def project_storage_dir(config: object, project_dir: Path) -> Path:
"""Return the per-project storage directory under ``config.storage_path``.

Uses a ``<basename>-<6hex>`` slug so two projects sharing the same
basename (e.g. ``~/work/api`` and ``~/scratch/api``) get separate
storage directories instead of silently colliding.

On first call, if the legacy directory (bare basename, no hash suffix)
exists but the new slug directory does not, the legacy directory is
renamed in place to preserve existing users' data.
"""
slug = _project_slug(project_dir)
storage_root = Path(config.storage_path) # type: ignore[union-attr]
slug_path = storage_root / slug
legacy_path = storage_root / project_dir.resolve().name

if not slug_path.exists() and legacy_path.exists():
try:
legacy_path.rename(slug_path)
_log.info("Migrated storage %s -> %s", legacy_path, slug_path)
except OSError:
_log.warning(
"Could not migrate legacy storage %s to %s; "
"using slug path (may re-index)",
legacy_path,
slug_path,
)

return slug_path


def resolve_cce_binary() -> str:
"""Find the globally installed cce binary path.

Expand Down
6 changes: 4 additions & 2 deletions tests/dashboard/test_dashboard_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@
from context_engine.config import Config
from context_engine.dashboard.server import create_app
from context_engine.memory import db as memory_db
from context_engine.utils import project_storage_dir


def _setup(tmp_path: Path, *, with_stats: bool = False, with_memory: bool = False):
"""Create storage + project dirs, optionally seed data. Return TestClient."""
project_name = "smoke-project"
project_dir = tmp_path / "workspace" / project_name
project_dir.mkdir(parents=True)
storage_base = tmp_path / "storage" / project_name
storage_base.mkdir(parents=True)
config = Config(storage_path=str(tmp_path / "storage"))
storage_base = project_storage_dir(config, project_dir)
storage_base.mkdir(parents=True, exist_ok=True)

if with_stats:
(storage_base / "stats.json").write_text(json.dumps({
Expand Down
14 changes: 8 additions & 6 deletions tests/dashboard/test_memory_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from context_engine.config import Config
from context_engine.dashboard.server import create_app
from context_engine.memory import db as memory_db
from context_engine.utils import project_storage_dir


@pytest.fixture
Expand All @@ -18,18 +19,19 @@ def client(tmp_path: Path):
project_dir.mkdir()
storage_path = tmp_path / "storage"
storage_path.mkdir()
project_storage = storage_path / "demo"
project_storage.mkdir()

config = Config(
storage_path=str(storage_path),
embedding_model="BAAI/bge-small-en-v1.5",
)
project_storage = project_storage_dir(config, project_dir)
project_storage.mkdir(parents=True, exist_ok=True)

# Minimal manifest so /api/files / /api/status work.
(project_storage / "manifest.json").write_text(
json.dumps({"__schema_version": 2, "files": {}, "last_git_sha": None})
)

config = Config(
storage_path=str(storage_path),
embedding_model="BAAI/bge-small-en-v1.5",
)
app = create_app(config, project_dir)
return TestClient(app), project_storage

Expand Down
6 changes: 4 additions & 2 deletions tests/dashboard/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@

from context_engine.config import Config
from context_engine.dashboard.server import create_app
from context_engine.utils import project_storage_dir


def _setup_storage(tmp_path: Path, project_name: str = "my-project") -> tuple[Path, Path]:
"""Create storage dir with stats.json and manifest.json; return (storage_root, project_dir)."""
project_dir = tmp_path / "workspace" / project_name
project_dir.mkdir(parents=True)
storage_base = tmp_path / "storage" / project_name
storage_base.mkdir(parents=True)
config = Config(storage_path=str(tmp_path / "storage"))
storage_base = project_storage_dir(config, project_dir)
storage_base.mkdir(parents=True, exist_ok=True)
return storage_base, project_dir


Expand Down
Loading
Loading