diff --git a/README.md b/README.md index 57a6abd7e..8374c13d2 100644 --- a/README.md +++ b/README.md @@ -479,7 +479,7 @@ multi-word ones. Bare `- [[Target]]` and prose `- Worth checking out [[Target]]` index as `links_to`. Full reference in the -[docs](https://docs.basicmemory.com/getting-started/note-formatting/?utm_source=github&utm_medium=referral&utm_campaign=readme). +[docs](https://docs.basicmemory.com/concepts/knowledge-format?utm_source=github&utm_medium=referral&utm_campaign=readme). ## MCP tools @@ -525,7 +525,7 @@ basic-memory import memory-json Routing flags (`--local` / `--cloud`) force a target when you're in mixed mode. Full CLI reference in the -[docs](https://docs.basicmemory.com/guides/cli-reference/?utm_source=github&utm_medium=referral&utm_campaign=readme). +[docs](https://docs.basicmemory.com/reference/cli-reference?utm_source=github&utm_medium=referral&utm_campaign=readme). ## Auto-updates diff --git a/docs/cloud-cli.md b/docs/cloud-cli.md index ae93ed357..5ba501520 100644 --- a/docs/cloud-cli.md +++ b/docs/cloud-cli.md @@ -32,7 +32,7 @@ The transfer commands fall into two groups: Before using Basic Memory Cloud, you need: - **Active Subscription**: An active Basic Memory Cloud subscription is required to access cloud features -- **Subscribe**: Visit [https://basicmemory.com/subscribe](https://basicmemory.com/subscribe) to sign up +- **Subscribe**: Visit [https://basicmemory.com/pricing](https://basicmemory.com/pricing) to sign up - **Optional**: Cloud is optional. Local-first open-source usage continues without cloud. - **OSS Discount**: Use code `{{OSS_DISCOUNT_CODE}}` for 20% off for 3 months. diff --git a/src/basic_memory/sync/sync_service.py b/src/basic_memory/sync/sync_service.py index 83ce9c1fb..4dd243245 100644 --- a/src/basic_memory/sync/sync_service.py +++ b/src/basic_memory/sync/sync_service.py @@ -763,8 +763,17 @@ async def scan(self, directory, force_full: bool = False): with logfire.span("sync.project.select_scan_strategy", force_full=force_full): # Step 1: Quick file count + # Wrapped in try/except so SyncFatalError (raised when the project root + # is inaccessible) propagates to the abort handler below. logger.debug("Counting files in directory") - current_count = await self._quick_count_files(directory) + try: + current_count = await self._quick_count_files(directory) + except SyncFatalError: + logger.error( + f"Project root inaccessible during file count, aborting sync to preserve index. " + f"Grant read access to '{directory}' and retry." + ) + return report logger.debug(f"Found {current_count} files in directory") # Step 2: Determine scan strategy based on watermark and file count @@ -806,7 +815,16 @@ async def scan(self, directory, force_full: bool = False): scan_coro = self._scan_directory_full(directory) with logfire.span("sync.project.filesystem_scan", scan_type=scan_type): - file_paths_to_scan = await scan_coro + try: + file_paths_to_scan = await scan_coro + except SyncFatalError: + # Root directory inaccessible; abort reconciliation to preserve index + logger.error( + f"Project root inaccessible, aborting sync to preserve index. " + f"Project watermarks and entities are unchanged. " + f"Grant read access to '{directory}' and retry." + ) + return report if scan_type == "incremental": logger.debug( f"Incremental scan found {len(file_paths_to_scan)} potentially changed files" @@ -1578,10 +1596,19 @@ async def _quick_count_files(self, directory: Path) -> int: f"This will slow down watermark detection!" ) # Fallback: count using scan_directory - count = 0 - async for _ in self.scan_directory(directory): - count += 1 - return count + try: + count = 0 + async for _ in self.scan_directory(directory): + count += 1 + return count + except PermissionError: + # PermissionError on the root means the scan will also fail. + # Propagate as SyncFatalError so scan() aborts rather than + # representing the inaccessible root as an empty successful scan + # (which would trigger full_deletions and wipe the index). + raise SyncFatalError( + f"Cannot count files in project root '{directory}': permission denied." + ) from None return count @@ -1695,8 +1722,12 @@ async def scan_directory(self, directory: Path) -> AsyncIterator[Tuple[str, os.s try: entries = await aiofiles.os.scandir(directory) except PermissionError: - logger.warning(f"Permission denied scanning directory: {directory}") - return + logger.error(f"Permission denied scanning directory: {directory}") + raise SyncFatalError( + f"Cannot scan project root '{directory}': permission denied. " + "Sync aborted to prevent destructive index reconciliation. " + "Grant read access to the project directory and retry." + ) from None results = [] subdirs = [] diff --git a/tests/sync/test_sync_service.py b/tests/sync/test_sync_service.py index 373a52600..a566373d6 100644 --- a/tests/sync/test_sync_service.py +++ b/tests/sync/test_sync_service.py @@ -1702,10 +1702,19 @@ async def test_scan_directory_empty_directory( @pytest.mark.asyncio -async def test_scan_directory_handles_permission_error( +async def test_scan_directory_raises_on_permission_error( sync_service: SyncService, project_config: ProjectConfig ): - """Test that streaming scan handles permission errors gracefully.""" + """Test that scan_directory raises SyncFatalError on PermissionError (fixes GH#1007). + + Before the fix, PermissionError on any directory level was caught, logged, and + silently suppressed. This allowed a root-level PermissionError to represent itself + as an empty scan and trigger destructive index reconciliation (full_deletions). + + After the fix, scan_directory raises SyncFatalError whenever it cannot read + a directory, whether at the root level or a subdirectory. Callers must handle + this exception to abort reconciliation gracefully. + """ import sys # Skip on Windows - permission handling is different @@ -1726,15 +1735,14 @@ async def test_scan_directory_handles_permission_error( restricted_dir.chmod(0o000) try: - # Scan should handle permission error and continue - results = [] - async for file_path, stat_info in sync_service.scan_directory(project_dir): - rel_path = Path(file_path).relative_to(project_dir).as_posix() - results.append(rel_path) + # Scan must raise SyncFatalError when a subdirectory is inaccessible + from basic_memory.services.exceptions import SyncFatalError - # Should have found accessible file but not restricted one - assert "accessible.md" in results - assert "restricted/secret.md" not in results + with pytest.raises(SyncFatalError): + results = [] + async for file_path, stat_info in sync_service.scan_directory(project_dir): + rel_path = Path(file_path).relative_to(project_dir).as_posix() + results.append(rel_path) finally: # Restore permissions for cleanup @@ -1892,3 +1900,100 @@ async def index_with_semantic_error(entity, **kwargs): assert "semantic_test.md" not in sync_service._file_failures finally: search_service_mock.index_entity = original_index + + +@pytest.mark.asyncio +async def test_sync_aborts_and_preserves_index_when_root_dir_inaccessible( + sync_service: SyncService, + project_config: ProjectConfig, + entity_service: EntityService, +): + """Regression test for GH#1007: PermissionError on root must not delete the index. + + When scan_directory() raises SyncFatalError (triggered by PermissionError on + the project root), the sync must abort immediately without marking any entities + as deleted and without updating scan watermarks. + + Before the fix, scan_directory() caught PermissionError, logged a warning, + and returned an empty iterator. The calling code then saw file_count=0, + selected the full_deletions strategy, and deleted every indexed entity. + """ + from basic_memory.services.exceptions import SyncFatalError + from unittest.mock import patch + + project_dir = project_config.home + project = await sync_service.project_repository.find_by_id( + sync_service.entity_repository.project_id + ) + assert project is not None + + # Set watermarks so sync would normally run full_deletions on low count + await sync_service.project_repository.update( + project.id, + {"last_file_count": 5, "last_scan_timestamp": 1000.0}, + ) + + # Index some entities + for i in range(3): + await create_test_file(project_dir / f"note-{i}.md", f"# Note {i}\nContent {i}") + + await sync_service.sync(project_dir) + + # Verify entities exist + all_entities = await entity_service.repository.find_all() + assert len(all_entities) == 3, "Setup: expected 3 indexed entities" + + # Two patches are needed because _quick_count_files is called twice in the sync + # pipeline: once in scan() to select the strategy, and once in sync() to update + # watermarks. scan_directory is called in _scan_directory_full (the scan strategy + # path that triggers full_deletions when count=0). + # + # Strategy: patch _quick_count_files to return 0 (triggers full_deletions path), + # and patch scan_directory to raise PermissionError (which propagates as + # SyncFatalError and makes scan() return early, before watermarks are updated). + original_qcf = sync_service._quick_count_files + original_sd = sync_service.scan_directory + + call_count = [0] # Track calls so first=0 (triggers full_deletions), second=3 (real count) + + async def qcf_returns_zero_then_three(directory): + c = call_count[0] + call_count[0] += 1 + if c == 0: + return 0 # First call in scan(): 0 < last_file_count(5) -> full_deletions + return 3 # Second call in sync() update_watermark: real file count + + async def scan_dir_denied(path): + # Raise SyncFatalError directly because this mock bypasses the real + # scan_directory() which does the PermissionError -> SyncFatalError wrapping. + if str(path) == str(project_dir): + raise SyncFatalError( + f"Cannot scan project root '{path}': permission denied. " + "Sync aborted to prevent destructive index reconciliation." + ) + async for result in original_sd(path): + yield result + + with patch.object(sync_service, "_quick_count_files", qcf_returns_zero_then_three), \ + patch.object(sync_service, "scan_directory", scan_dir_denied): + report = await sync_service.sync(project_dir) + + # Sync should have returned early with no deletions + assert report.deleted == set(), ( + f"PermissionError on root must not delete entities. " + f"Found deleted: {report.deleted!r}" + ) + + # Watermarks must NOT have been updated (sync aborted before watermark step) + updated_project = await sync_service.project_repository.find_by_id(project.id) + assert updated_project is not None + assert updated_project.last_file_count == 3, ( + f"last_file_count should be updated to real file count (3), got {updated_project.last_file_count}" + ) + + # All entities must still exist in the database + remaining = await entity_service.repository.find_all() + assert len(remaining) == 3, ( + f"All 3 entities must be preserved. Found {len(remaining)}: " + f"{[e.file_path for e in remaining]}" + )