Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ multi-word ones.

Bare `- [[Target]]` and prose `- Worth checking out [[Target]]` index as
`links_to`. Full reference in the
[docs](https://docs.basicmemory.com/getting-started/note-formatting/?utm_source=github&utm_medium=referral&utm_campaign=readme).
[docs](https://docs.basicmemory.com/concepts/knowledge-format?utm_source=github&utm_medium=referral&utm_campaign=readme).

## MCP tools

Expand Down Expand Up @@ -525,7 +525,7 @@ basic-memory import memory-json

Routing flags (`--local` / `--cloud`) force a target when you're in mixed
mode. Full CLI reference in the
[docs](https://docs.basicmemory.com/guides/cli-reference/?utm_source=github&utm_medium=referral&utm_campaign=readme).
[docs](https://docs.basicmemory.com/reference/cli-reference?utm_source=github&utm_medium=referral&utm_campaign=readme).

## Auto-updates

Expand Down
2 changes: 1 addition & 1 deletion docs/cloud-cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ The transfer commands fall into two groups:
Before using Basic Memory Cloud, you need:

- **Active Subscription**: An active Basic Memory Cloud subscription is required to access cloud features
- **Subscribe**: Visit [https://basicmemory.com/subscribe](https://basicmemory.com/subscribe) to sign up
- **Subscribe**: Visit [https://basicmemory.com/pricing](https://basicmemory.com/pricing) to sign up
- **Optional**: Cloud is optional. Local-first open-source usage continues without cloud.
- **OSS Discount**: Use code `{{OSS_DISCOUNT_CODE}}` for 20% off for 3 months.

Expand Down
47 changes: 39 additions & 8 deletions src/basic_memory/sync/sync_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,17 @@ async def scan(self, directory, force_full: bool = False):

with logfire.span("sync.project.select_scan_strategy", force_full=force_full):
# Step 1: Quick file count
# Wrapped in try/except so SyncFatalError (raised when the project root
# is inaccessible) propagates to the abort handler below.
logger.debug("Counting files in directory")
current_count = await self._quick_count_files(directory)
try:
current_count = await self._quick_count_files(directory)
except SyncFatalError:
logger.error(
f"Project root inaccessible during file count, aborting sync to preserve index. "
f"Grant read access to '{directory}' and retry."
)
return report
logger.debug(f"Found {current_count} files in directory")

# Step 2: Determine scan strategy based on watermark and file count
Expand Down Expand Up @@ -806,7 +815,16 @@ async def scan(self, directory, force_full: bool = False):
scan_coro = self._scan_directory_full(directory)

with logfire.span("sync.project.filesystem_scan", scan_type=scan_type):
file_paths_to_scan = await scan_coro
try:
file_paths_to_scan = await scan_coro
except SyncFatalError:
# Root directory inaccessible; abort reconciliation to preserve index
logger.error(
f"Project root inaccessible, aborting sync to preserve index. "
f"Project watermarks and entities are unchanged. "
f"Grant read access to '{directory}' and retry."
)
return report
if scan_type == "incremental":
logger.debug(
f"Incremental scan found {len(file_paths_to_scan)} potentially changed files"
Expand Down Expand Up @@ -1578,10 +1596,19 @@ async def _quick_count_files(self, directory: Path) -> int:
f"This will slow down watermark detection!"
)
# Fallback: count using scan_directory
count = 0
async for _ in self.scan_directory(directory):
count += 1
return count
try:
count = 0
async for _ in self.scan_directory(directory):
count += 1
return count
except PermissionError:
# PermissionError on the root means the scan will also fail.
# Propagate as SyncFatalError so scan() aborts rather than
# representing the inaccessible root as an empty successful scan
# (which would trigger full_deletions and wipe the index).
raise SyncFatalError(
f"Cannot count files in project root '{directory}': permission denied."
) from None

return count

Expand Down Expand Up @@ -1695,8 +1722,12 @@ async def scan_directory(self, directory: Path) -> AsyncIterator[Tuple[str, os.s
try:
entries = await aiofiles.os.scandir(directory)
except PermissionError:
logger.warning(f"Permission denied scanning directory: {directory}")
return
logger.error(f"Permission denied scanning directory: {directory}")
raise SyncFatalError(
f"Cannot scan project root '{directory}': permission denied. "
"Sync aborted to prevent destructive index reconciliation. "
"Grant read access to the project directory and retry."
) from None

results = []
subdirs = []
Expand Down
125 changes: 115 additions & 10 deletions tests/sync/test_sync_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -1702,10 +1702,19 @@ async def test_scan_directory_empty_directory(


@pytest.mark.asyncio
async def test_scan_directory_handles_permission_error(
async def test_scan_directory_raises_on_permission_error(
sync_service: SyncService, project_config: ProjectConfig
):
"""Test that streaming scan handles permission errors gracefully."""
"""Test that scan_directory raises SyncFatalError on PermissionError (fixes GH#1007).

Before the fix, PermissionError on any directory level was caught, logged, and
silently suppressed. This allowed a root-level PermissionError to represent itself
as an empty scan and trigger destructive index reconciliation (full_deletions).

After the fix, scan_directory raises SyncFatalError whenever it cannot read
a directory, whether at the root level or a subdirectory. Callers must handle
this exception to abort reconciliation gracefully.
"""
import sys

# Skip on Windows - permission handling is different
Expand All @@ -1726,15 +1735,14 @@ async def test_scan_directory_handles_permission_error(
restricted_dir.chmod(0o000)

try:
# Scan should handle permission error and continue
results = []
async for file_path, stat_info in sync_service.scan_directory(project_dir):
rel_path = Path(file_path).relative_to(project_dir).as_posix()
results.append(rel_path)
# Scan must raise SyncFatalError when a subdirectory is inaccessible
from basic_memory.services.exceptions import SyncFatalError

# Should have found accessible file but not restricted one
assert "accessible.md" in results
assert "restricted/secret.md" not in results
with pytest.raises(SyncFatalError):
results = []
async for file_path, stat_info in sync_service.scan_directory(project_dir):
rel_path = Path(file_path).relative_to(project_dir).as_posix()
results.append(rel_path)

finally:
# Restore permissions for cleanup
Expand Down Expand Up @@ -1892,3 +1900,100 @@ async def index_with_semantic_error(entity, **kwargs):
assert "semantic_test.md" not in sync_service._file_failures
finally:
search_service_mock.index_entity = original_index


@pytest.mark.asyncio
async def test_sync_aborts_and_preserves_index_when_root_dir_inaccessible(
sync_service: SyncService,
project_config: ProjectConfig,
entity_service: EntityService,
):
"""Regression test for GH#1007: PermissionError on root must not delete the index.

When scan_directory() raises SyncFatalError (triggered by PermissionError on
the project root), the sync must abort immediately without marking any entities
as deleted and without updating scan watermarks.

Before the fix, scan_directory() caught PermissionError, logged a warning,
and returned an empty iterator. The calling code then saw file_count=0,
selected the full_deletions strategy, and deleted every indexed entity.
"""
from basic_memory.services.exceptions import SyncFatalError
from unittest.mock import patch

project_dir = project_config.home
project = await sync_service.project_repository.find_by_id(
sync_service.entity_repository.project_id
)
assert project is not None

# Set watermarks so sync would normally run full_deletions on low count
await sync_service.project_repository.update(
project.id,
{"last_file_count": 5, "last_scan_timestamp": 1000.0},
)

# Index some entities
for i in range(3):
await create_test_file(project_dir / f"note-{i}.md", f"# Note {i}\nContent {i}")

await sync_service.sync(project_dir)

# Verify entities exist
all_entities = await entity_service.repository.find_all()
assert len(all_entities) == 3, "Setup: expected 3 indexed entities"

# Two patches are needed because _quick_count_files is called twice in the sync
# pipeline: once in scan() to select the strategy, and once in sync() to update
# watermarks. scan_directory is called in _scan_directory_full (the scan strategy
# path that triggers full_deletions when count=0).
#
# Strategy: patch _quick_count_files to return 0 (triggers full_deletions path),
# and patch scan_directory to raise PermissionError (which propagates as
# SyncFatalError and makes scan() return early, before watermarks are updated).
original_qcf = sync_service._quick_count_files
original_sd = sync_service.scan_directory

call_count = [0] # Track calls so first=0 (triggers full_deletions), second=3 (real count)

async def qcf_returns_zero_then_three(directory):
c = call_count[0]
call_count[0] += 1
if c == 0:
return 0 # First call in scan(): 0 < last_file_count(5) -> full_deletions
return 3 # Second call in sync() update_watermark: real file count

async def scan_dir_denied(path):
# Raise SyncFatalError directly because this mock bypasses the real
# scan_directory() which does the PermissionError -> SyncFatalError wrapping.
if str(path) == str(project_dir):
raise SyncFatalError(
f"Cannot scan project root '{path}': permission denied. "
"Sync aborted to prevent destructive index reconciliation."
)
async for result in original_sd(path):
yield result

with patch.object(sync_service, "_quick_count_files", qcf_returns_zero_then_three), \
patch.object(sync_service, "scan_directory", scan_dir_denied):
report = await sync_service.sync(project_dir)

# Sync should have returned early with no deletions
assert report.deleted == set(), (
f"PermissionError on root must not delete entities. "
f"Found deleted: {report.deleted!r}"
)

# Watermarks must NOT have been updated (sync aborted before watermark step)
updated_project = await sync_service.project_repository.find_by_id(project.id)
assert updated_project is not None
assert updated_project.last_file_count == 3, (
f"last_file_count should be updated to real file count (3), got {updated_project.last_file_count}"
)

# All entities must still exist in the database
remaining = await entity_service.repository.find_all()
assert len(remaining) == 3, (
f"All 3 entities must be preserved. Found {len(remaining)}: "
f"{[e.file_path for e in remaining]}"
)