From 4c10490cd89a2a407351335f2fd5b31e24b9b11b Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 11 Jun 2026 14:37:02 +0000 Subject: [PATCH 1/3] Align FileAccess tools with .Net; add directory discovery and recursive search --- python/packages/core/AGENTS.md | 4 +- .../agent_framework/_harness/_file_access.py | 193 +++++++++++++++--- .../tests/core/test_harness_file_access.py | 159 ++++++++++++++- .../data_processing.py | 6 +- .../console/observers/planning_models.py | 7 +- 5 files changed, 327 insertions(+), 42 deletions(-) diff --git a/python/packages/core/AGENTS.md b/python/packages/core/AGENTS.md index 92e83c5df4d..1a353ac3557 100644 --- a/python/packages/core/AGENTS.md +++ b/python/packages/core/AGENTS.md @@ -94,11 +94,11 @@ agent_framework/ ### File Access Harness (`_harness/_file_access.py`) -- **`AgentFileStore`** - Abstract async store backing the file-access harness. Implementations expose `write_file`, `read_file`, `delete_file`, `list_files`, `file_exists`, `search_files`, and `create_directory` over forward-slash relative paths. +- **`AgentFileStore`** - Abstract async store backing the file-access harness. Implementations expose `write_file`, `read_file`, `delete_file`, `list_files`, `list_directories`, `file_exists`, `search_files`, and `create_directory` over forward-slash relative paths. `list_files`/`list_directories` return only direct children; `search_files` accepts a keyword-only `recursive` flag (default `False`) and, when `recursive=True`, walks all descendants and returns `file_name` values relative to the search directory. - **`InMemoryAgentFileStore`** - Dict-backed store suitable for tests and lightweight scenarios. - **`FileSystemAgentFileStore`** - Disk-backed store rooted under a configurable directory. Enforces relative-path normalization, root containment, and rejects symlink/reparse-point segments to prevent escape. - **`FileSearchResult`** / **`FileSearchMatch`** - `SerializationMixin` DTOs returned by `search_files`, carrying the matching file name, a context snippet, and the matching lines with 1-based line numbers. -- **`FileAccessProvider`** - `ContextProvider` that adds shared file-access tools (`file_access_save_file`, `file_access_read_file`, `file_access_delete_file`, `file_access_list_files`, `file_access_search_files`) plus default usage instructions to each invocation. Unlike `MemoryContextProvider`, the store is intentionally shared across sessions and agents. +- **`FileAccessProvider`** - `ContextProvider` that adds shared file-access tools (`file_access_save_file`, `file_access_read_file`, `file_access_delete_file`, `file_access_list_files`, `file_access_list_subdirectories`, `file_access_search_files`) plus default usage instructions to each invocation. `file_access_list_files`/`file_access_list_subdirectories` enumerate direct children (files / subdirectories) so the agent can walk the tree level by level; `file_access_search_files` searches recursively from the store root and returns store-root-relative `file_name` paths, scoped via an `fnmatch` glob (where `*` crosses `/`, e.g. `*.md`, `reports/*`). Unlike `MemoryContextProvider`, the store is intentionally shared across sessions and agents. ### Workflows (`_workflows/`) diff --git a/python/packages/core/agent_framework/_harness/_file_access.py b/python/packages/core/agent_framework/_harness/_file_access.py index 08632827c91..fadcf4617d0 100644 --- a/python/packages/core/agent_framework/_harness/_file_access.py +++ b/python/packages/core/agent_framework/_harness/_file_access.py @@ -5,9 +5,10 @@ Unlike :class:`~agent_framework.MemoryContextProvider`, which provides session-scoped memory that may be isolated per session, :class:`FileAccessProvider` operates on a shared, persistent storage area whose contents are visible across -sessions and agents. The provider exposes five tools — ``file_access_save_file``, +sessions and agents. The provider exposes six tools — ``file_access_save_file``, ``file_access_read_file``, ``file_access_delete_file``, ``file_access_list_files``, -and ``file_access_search_files`` — by registering them on the per-invocation +``file_access_list_subdirectories``, and ``file_access_search_files`` — by +registering them on the per-invocation :class:`~agent_framework.SessionContext` in :meth:`FileAccessProvider.before_run`. The store abstraction is generic so callers can plug in in-memory, local-disk, or @@ -48,7 +49,11 @@ "Use these tools to read input data provided by the user, write output " "artifacts, and manage any files the user has asked you to work with.\n\n" "- Never delete or overwrite existing files unless the user has explicitly " - "asked you to do so." + "asked you to do so.\n" + "- Files may be organized into subdirectories. Use `file_access_list_files` " + "and `file_access_list_subdirectories` to explore the tree level by level, " + "or `file_access_search_files` to search file contents recursively across " + "the whole store." ) # Maximum number of characters of context to include on either side of the first @@ -178,10 +183,16 @@ def _normalize_relative_path(path: str, *, is_directory: bool = False) -> str: def _matches_glob(file_name: str, pattern: str | None) -> bool: """Return whether ``file_name`` matches the optional glob pattern (case-insensitive). - When ``pattern`` is ``None`` or blank this returns True so callers can skip - filtering by passing nothing. Matching uses :func:`fnmatch.fnmatchcase` over a - lowercased pattern/name pair to give consistent results across operating - systems (``fnmatch.fnmatch`` is case-sensitive on POSIX but not on Windows). + ``file_name`` is the forward-slash path of a file relative to the search + directory (for a direct child this is just its basename; for a recursive + search it may contain ``/`` separators). When ``pattern`` is ``None`` or blank + this returns True so callers can skip filtering by passing nothing. Matching + uses :func:`fnmatch.fnmatchcase` over a lowercased pattern/name pair to give + consistent results across operating systems (``fnmatch.fnmatch`` is + case-sensitive on POSIX but not on Windows). Note that with ``fnmatch`` a + ``*`` matches any characters **including** ``/``, so ``"*.md"`` matches + markdown files at any depth and ``"reports/*"`` matches everything under + ``reports``. """ if pattern is None or not pattern.strip(): return True @@ -418,6 +429,18 @@ async def list_files(self, directory: str = "") -> list[str]: The list of file names (not full paths) in the specified directory. """ + @abstractmethod + async def list_directories(self, directory: str = "") -> list[str]: + """List the direct child subdirectory names of ``directory``. + + Args: + directory: The relative directory path to list. Use ``""`` for the root. + + Returns: + The list of subdirectory names (not full paths) directly contained in + the specified directory. + """ + @abstractmethod async def file_exists(self, path: str) -> bool: """Return whether a file exists at ``path``. @@ -432,6 +455,8 @@ async def search_files( directory: str, regex_pattern: str, file_pattern: str | None = None, + *, + recursive: bool = False, ) -> list[FileSearchResult]: """Search files in ``directory`` for content matching ``regex_pattern``. @@ -441,12 +466,19 @@ async def search_files( (case-insensitive). For example, ``"error|warning"`` matches lines containing ``"error"`` or ``"warning"``. file_pattern: An optional glob pattern (case-insensitive) used to - filter which files are searched. When ``None`` or blank, every - file in the directory is searched. + filter which files are searched. The pattern is matched against + each file's path **relative to** ``directory`` (forward slashes). + When ``None`` or blank, every file in scope is searched. + + Keyword Args: + recursive: When ``False`` (default) only the direct children of + ``directory`` are searched. When ``True`` every descendant file is + searched. Returns: The list of files whose content matched, with snippet and matching - line metadata. + line metadata. Each result's ``file_name`` is the path relative to + ``directory`` (forward slashes). """ @abstractmethod @@ -530,6 +562,38 @@ async def list_files(self, directory: str = "") -> list[str]: results.append(display[len(prefix) :]) return results + async def list_directories(self, directory: str = "") -> list[str]: + """Return the direct child subdirectory names of ``directory``. + + A subdirectory is the first path segment of any stored key whose + remainder (after the directory prefix) still contains a ``/`` separator. + Distinct first segments are collected, preserving the *original-case* + display name and de-duplicating case-insensitively, mirroring the + case-preserving behaviour of :class:`FileSystemAgentFileStore`. + """ + prefix = _normalize_relative_path(directory, is_directory=True).lower() + if prefix and not prefix.endswith("/"): + prefix += "/" + async with self._lock: + entries = [(key, display) for key, (display, _) in self._files.items()] + results: list[str] = [] + seen: set[str] = set() + for key, display in entries: + if not key.startswith(prefix): + continue + remainder = key[len(prefix) :] + separator_index = remainder.find("/") + if separator_index <= 0: + continue + segment_key = remainder[:separator_index] + if segment_key in seen: + continue + seen.add(segment_key) + # ``display`` is the original-case normalized path; take the matching + # first segment after the (case-insensitive) prefix. + results.append(display[len(prefix) : len(prefix) + separator_index]) + return results + async def file_exists(self, path: str) -> bool: """Return whether the file exists.""" key = self._key(path) @@ -541,6 +605,8 @@ async def search_files( directory: str, regex_pattern: str, file_pattern: str | None = None, + *, + recursive: bool = False, ) -> list[FileSearchResult]: """Search file contents for ``regex_pattern`` matches. @@ -548,7 +614,10 @@ async def search_files( to a worker thread with a bounded timeout so a pathological pattern cannot stall the event loop. Returned :class:`FileSearchResult` instances use the *original-case* file names so the result mirrors - what :class:`FileSystemAgentFileStore` would produce. + what :class:`FileSystemAgentFileStore` would produce. The glob and each + result's ``file_name`` are relative to ``directory``; when ``recursive`` + is ``True`` all descendants are searched and the relative path may + contain ``/`` separators. """ prefix = _normalize_relative_path(directory, is_directory=True).lower() if prefix and not prefix.endswith("/"): @@ -564,7 +633,7 @@ def scan() -> list[FileSearchResult]: if not key.startswith(prefix): continue relative_key = key[len(prefix) :] - if "/" in relative_key: + if not recursive and "/" in relative_key: continue relative_display = display[len(prefix) :] if not _matches_glob(relative_display, file_pattern): @@ -795,6 +864,28 @@ def _list_files_sync(full_dir: Path) -> list[str]: names.append(entry.name) return names + async def list_directories(self, directory: str = "") -> list[str]: + """Return the direct child subdirectory names of ``directory``. + + Symlinked directories (and reparse points on Windows) are excluded so a + listing cannot surface a path that escapes the root. An empty list is + returned for a non-existent directory. + """ + full_dir = self._resolve_safe_directory_path(directory) + return await asyncio.to_thread(self._list_directories_sync, full_dir) + + @staticmethod + def _list_directories_sync(full_dir: Path) -> list[str]: + if not full_dir.is_dir(): + return [] + names: list[str] = [] + for entry in full_dir.iterdir(): + if entry.is_symlink(): + continue + if entry.is_dir(): + names.append(entry.name) + return names + async def file_exists(self, path: str) -> bool: """Return whether the file exists.""" full_path = self._resolve_safe_path(path) @@ -809,6 +900,8 @@ async def search_files( directory: str, regex_pattern: str, file_pattern: str | None = None, + *, + recursive: bool = False, ) -> list[FileSearchResult]: """Search file contents for ``regex_pattern`` matches. @@ -816,23 +909,50 @@ async def search_files( file does not abort the whole directory search). Each skip is logged at ``WARNING`` level and a summary is logged at ``INFO`` so operators can tell the difference between "no matches" and "the corpus was largely - not searchable". + not searchable". The glob and each result's ``file_name`` are the file's + path relative to ``directory`` (forward slashes); when ``recursive`` is + ``True`` all descendant files are searched, otherwise only the direct + children. """ full_dir = self._resolve_safe_directory_path(directory) regex = _compile_search_regex(regex_pattern) - return await _run_search_with_timeout(lambda: self._search_files_sync(full_dir, regex, file_pattern)) + return await _run_search_with_timeout(lambda: self._search_files_sync(full_dir, regex, file_pattern, recursive)) @staticmethod - def _search_files_sync(full_dir: Path, regex: re.Pattern[str], file_pattern: str | None) -> list[FileSearchResult]: + def _enumerate_search_files(full_dir: Path, recursive: bool) -> list[tuple[str, Path]]: + """Enumerate ``(relative_name, path)`` for files to search under ``full_dir``. + + Symlinked files and symlinked directories (reparse points on Windows) + are skipped so the search cannot read or descend outside the root. + ``relative_name`` is the file's path relative to ``full_dir`` using + forward slashes. + """ + found: list[tuple[str, Path]] = [] + directories: list[Path] = [full_dir] + while directories: + current = directories.pop() + for entry in current.iterdir(): + if entry.is_symlink(): + continue + if entry.is_dir(): + if recursive: + directories.append(entry) + continue + if entry.is_file(): + relative_name = entry.relative_to(full_dir).as_posix() + found.append((relative_name, entry)) + return found + + @staticmethod + def _search_files_sync( + full_dir: Path, regex: re.Pattern[str], file_pattern: str | None, recursive: bool + ) -> list[FileSearchResult]: if not full_dir.is_dir(): return [] results: list[FileSearchResult] = [] skipped: list[str] = [] - for entry in full_dir.iterdir(): - if entry.is_symlink() or not entry.is_file(): - continue - file_name = entry.name - if not _matches_glob(file_name, file_pattern): + for relative_name, entry in FileSystemAgentFileStore._enumerate_search_files(full_dir, recursive): + if not _matches_glob(relative_name, file_pattern): continue try: file_content = entry.read_text(encoding="utf-8") @@ -841,9 +961,9 @@ def _search_files_sync(full_dir: Path, regex: re.Pattern[str], file_pattern: str # un-decodable entry doesn't abort the whole directory search. # Log per file so operators can audit which files were skipped. logger.warning("Skipping non-UTF-8 file during search: %s", entry) - skipped.append(file_name) + skipped.append(relative_name) continue - result = _search_file_content(file_name, file_content, regex) + result = _search_file_content(relative_name, file_content, regex) if result is not None: results.append(result) if skipped: @@ -865,15 +985,18 @@ async def create_directory(self, path: str) -> None: class FileAccessProvider(ContextProvider): """Context provider that gives an agent CRUD/search access to a shared file store. - The provider exposes five tools to the agent via the per-invocation + The provider exposes six tools to the agent via the per-invocation :class:`~agent_framework.SessionContext`: - ``file_access_save_file`` — Save a file (refuses to overwrite by default). - ``file_access_read_file`` — Read the content of a file by name. - ``file_access_delete_file`` — Delete a file by name. - - ``file_access_list_files`` — List all file names at the store root. - - ``file_access_search_files`` — Search file contents using a case-insensitive - regex, optionally filtered by a glob pattern over file names. + - ``file_access_list_files`` — List the direct child file names of a directory. + - ``file_access_list_subdirectories`` — List the direct child subdirectory + names of a directory. + - ``file_access_search_files`` — Recursively search file contents from the + store root using a case-insensitive regex, optionally filtered by a glob + pattern over the store-root-relative file paths. Unlike :class:`~agent_framework.MemoryContextProvider`, which provides session-scoped memory that may be isolated per session, @@ -976,17 +1099,26 @@ async def file_access_list_files(directory: str | None = None) -> list[str] | st except OSError as exc: return f"Could not list directory '{directory or ''}': {exc.strerror or exc}" + @tool(name="file_access_list_subdirectories", approval_mode="never_require") + async def file_access_list_subdirectories(directory: str | None = None) -> list[str] | str: + """List the direct child subdirectory names of a directory. Omit ``directory`` (or pass an empty string) to list the root. To enumerate subdirectories of a subdirectory, pass its relative path, for example ``"reports"`` or ``"reports/2024"``. Use this together with file_access_list_files to explore the directory tree level by level.""" # noqa: E501 + target = directory if directory and directory.strip() else "" + try: + return await self.store.list_directories(target) + except ValueError as exc: + return f"Could not list directory '{directory or ''}': {exc}" + except OSError as exc: + return f"Could not list directory '{directory or ''}': {exc.strerror or exc}" + @tool(name="file_access_search_files", approval_mode="never_require") async def file_access_search_files( regex_pattern: str, file_pattern: str | None = None, - directory: str | None = None, ) -> list[dict[str, Any]] | str: - """Search file contents using a regular expression pattern (case-insensitive). Optionally filter which files to search using a glob pattern (e.g., "*.md", "research*"). Optionally scope the search to a subdirectory by passing its relative path; omit ``directory`` (or pass an empty string) to search the root. Returns matching file names, snippets, and matching lines with line numbers. The regex_pattern must be 256 characters or fewer.""" # noqa: E501 + """Search the contents of all files in the store (recursively, across all subdirectories) using a regular expression pattern (case-insensitive). Optionally filter which files to search using a glob pattern matched against each file's path relative to the store root. The glob uses fnmatch semantics where ``*`` matches any characters including ``/``: use ``"*.md"`` to match markdown files at any depth, or ``"reports/*"`` to restrict the search to the ``reports`` subtree. Leave empty or omit to search all files. Returns matching results whose file_name values are paths relative to the store root (usable with file_access_read_file), along with snippets and matching lines with line numbers. The regex_pattern must be 256 characters or fewer.""" # noqa: E501 pattern = file_pattern if file_pattern and file_pattern.strip() else None - target = directory if directory and directory.strip() else "" try: - results = await self.store.search_files(target, regex_pattern, pattern) + results = await self.store.search_files("", regex_pattern, pattern, recursive=True) except ValueError as exc: return f"Could not search files: {exc}" except OSError as exc: @@ -1001,6 +1133,7 @@ async def file_access_search_files( file_access_read_file, file_access_delete_file, file_access_list_files, + file_access_list_subdirectories, file_access_search_files, ], ) diff --git a/python/packages/core/tests/core/test_harness_file_access.py b/python/packages/core/tests/core/test_harness_file_access.py index c9599ccbf3c..a873d7c93df 100644 --- a/python/packages/core/tests/core/test_harness_file_access.py +++ b/python/packages/core/tests/core/test_harness_file_access.py @@ -158,6 +158,69 @@ async def test_in_memory_store_search_returns_matches_with_snippets() -> None: assert {result.file_name for result in results_all} == {"a.md", "notes.txt"} +async def test_in_memory_store_search_is_recursive_with_root_relative_names() -> None: + """Recursive search should find files at any depth and return root-relative names.""" + store = InMemoryAgentFileStore() + await store.write_file("top.md", "ERROR at top") + await store.write_file("reports/q1.md", "ERROR in q1") + await store.write_file("reports/2024/q2.md", "ERROR in q2") + await store.write_file("reports/2024/data.txt", "ERROR wrong extension") + + # Non-recursive (default) only sees the direct child. + direct = await store.search_files("", "error") + assert {result.file_name for result in direct} == {"top.md"} + + # Recursive sees every descendant, with store-root-relative file names. + recursive = await store.search_files("", "error", recursive=True) + assert {result.file_name for result in recursive} == { + "top.md", + "reports/q1.md", + "reports/2024/q2.md", + "reports/2024/data.txt", + } + + # Subtree scoping via the glob (``*`` crosses ``/`` with fnmatch). + scoped = await store.search_files("", "error", "reports/*", recursive=True) + assert {result.file_name for result in scoped} == { + "reports/q1.md", + "reports/2024/q2.md", + "reports/2024/data.txt", + } + + # Extension glob matches markdown at any depth but not other extensions. + markdown = await store.search_files("", "error", "*.md", recursive=True) + assert {result.file_name for result in markdown} == { + "top.md", + "reports/q1.md", + "reports/2024/q2.md", + } + + +async def test_in_memory_store_list_directories() -> None: + """``list_directories`` should return direct child subdirectories only, preserving casing.""" + store = InMemoryAgentFileStore() + await store.write_file("top.md", "x") + await store.write_file("Reports/q1.md", "x") + await store.write_file("Reports/2024/q2.md", "x") + await store.write_file("data/raw.csv", "x") + + assert sorted(await store.list_directories()) == ["Reports", "data"] + assert sorted(await store.list_directories("Reports")) == ["2024"] + # A directory with no subdirectories returns an empty list. + assert await store.list_directories("data") == [] + # A missing directory returns an empty list. + assert await store.list_directories("missing") == [] + + +async def test_in_memory_store_list_directories_rejects_traversal() -> None: + """``list_directories`` must reject traversal inputs the way ``list_files`` does.""" + store = InMemoryAgentFileStore() + await store.write_file("reports/q1.md", "x") + for bad in ("../escape", "/abs/path", ".."): + with pytest.raises(ValueError): + await store.list_directories(bad) + + async def test_in_memory_store_search_rejects_invalid_and_oversize_regex() -> None: """``search_files`` should surface clean errors for bad regex input.""" store = InMemoryAgentFileStore() @@ -267,6 +330,78 @@ async def test_filesystem_store_search_matches_lines_and_filters_globs(tmp_path: assert {result.file_name for result in results_all} == {"a.md", "b.txt"} +async def test_filesystem_store_search_is_recursive_with_root_relative_names(tmp_path: Path) -> None: + """Recursive filesystem search should walk the subtree and return root-relative names.""" + store = FileSystemAgentFileStore(tmp_path) + await store.write_file("top.md", "ERROR at top") + await store.write_file("reports/q1.md", "ERROR in q1") + await store.write_file("reports/2024/q2.md", "ERROR in q2") + + direct = await store.search_files("", "error") + assert {result.file_name for result in direct} == {"top.md"} + + recursive = await store.search_files("", "error", recursive=True) + assert {result.file_name for result in recursive} == { + "top.md", + "reports/q1.md", + "reports/2024/q2.md", + } + + scoped = await store.search_files("", "error", "reports/*", recursive=True) + assert {result.file_name for result in scoped} == { + "reports/q1.md", + "reports/2024/q2.md", + } + + +async def test_filesystem_store_list_directories(tmp_path: Path) -> None: + """``list_directories`` should list direct child subdirectories only.""" + store = FileSystemAgentFileStore(tmp_path) + await store.write_file("top.md", "x") + await store.write_file("reports/q1.md", "x") + await store.write_file("reports/2024/q2.md", "x") + await store.write_file("data/raw.csv", "x") + + assert sorted(await store.list_directories()) == ["data", "reports"] + assert sorted(await store.list_directories("reports")) == ["2024"] + assert await store.list_directories("data") == [] + assert await store.list_directories("missing") == [] + + +async def test_filesystem_store_list_directories_rejects_traversal(tmp_path: Path) -> None: + """``list_directories`` is security-critical and must reject paths that escape the root.""" + store = FileSystemAgentFileStore(tmp_path) + await store.write_file("reports/q1.md", "x") + for bad in ("../escape", "/etc", "C:/Windows", ".."): + with pytest.raises(ValueError): + await store.list_directories(bad) + + +async def test_filesystem_store_search_and_list_skip_symlinked_directories(tmp_path: Path) -> None: + """Recursive search must not descend into symlinked dirs and ``list_directories`` must exclude them.""" + target = tmp_path / "outside" + target.mkdir() + (target / "secret.md").write_text("ERROR outside the root", encoding="utf-8") + + root = tmp_path / "root" + root.mkdir() + (root / "inside.md").write_text("ERROR inside", encoding="utf-8") + link = root / "linked" + try: + link.symlink_to(target, target_is_directory=True) + except (OSError, NotImplementedError) as exc: + pytest.skip(f"Symbolic links are not supported in this environment: {exc!r}") + + store = FileSystemAgentFileStore(root) + + # ``list_directories`` excludes the symlinked directory. + assert await store.list_directories() == [] + + # Recursive search does not follow the symlink out of the root. + results = await store.search_files("", "error", recursive=True) + assert {result.file_name for result in results} == {"inside.md"} + + async def test_filesystem_store_search_skips_non_utf8_files(tmp_path: Path) -> None: """The filesystem store should silently skip non-UTF-8 files instead of aborting the search.""" store = FileSystemAgentFileStore(tmp_path) @@ -303,7 +438,7 @@ def test_filesystem_store_requires_non_empty_root() -> None: async def test_file_access_provider_registers_tools_and_instructions( chat_client_base: SupportsChatGetResponse, ) -> None: - """``FileAccessProvider.before_run`` should add the canonical instructions and five tools.""" + """``FileAccessProvider.before_run`` should add the canonical instructions and six tools.""" session = AgentSession(session_id="session-1") store = InMemoryAgentFileStore() provider = FileAccessProvider(store=store) @@ -321,6 +456,7 @@ async def test_file_access_provider_registers_tools_and_instructions( "file_access_read_file", "file_access_delete_file", "file_access_list_files", + "file_access_list_subdirectories", "file_access_search_files", } assert {getattr(tool, "name", None) for tool in tools} >= expected_names @@ -354,6 +490,7 @@ async def test_file_access_provider_delete_approval_defaults_to_always_require( "file_access_save_file", "file_access_read_file", "file_access_list_files", + "file_access_list_subdirectories", "file_access_search_files", ): assert _tool_by_name(tools, name).approval_mode == "never_require" @@ -396,6 +533,7 @@ async def test_file_access_provider_tools_round_trip_files( read_file = _tool_by_name(tools, "file_access_read_file") delete_file = _tool_by_name(tools, "file_access_delete_file") list_files = _tool_by_name(tools, "file_access_list_files") + list_subdirectories = _tool_by_name(tools, "file_access_list_subdirectories") search_files = _tool_by_name(tools, "file_access_search_files") saved = await save_file.invoke(arguments={"file_name": "plan.md", "content": "step 1\nERROR step 2"}) @@ -426,6 +564,15 @@ async def test_file_access_provider_tools_round_trip_files( listed_blank = await list_files.invoke(arguments={"directory": " "}) assert sorted(json.loads(listed_blank[0].text)) == ["plan.md"] + # The subdirectory-discovery tool surfaces child directories (not files). + listed_dirs = await list_subdirectories.invoke() + assert json.loads(listed_dirs[0].text) == ["reports"] + listed_dirs_blank = await list_subdirectories.invoke(arguments={"directory": " "}) + assert json.loads(listed_dirs_blank[0].text) == ["reports"] + # A leaf directory with no child directories returns an empty list. + listed_dirs_nested = await list_subdirectories.invoke(arguments={"directory": "reports"}) + assert json.loads(listed_dirs_nested[0].text) == [] + missing = await read_file.invoke(arguments={"file_name": "missing.md"}) assert "not found" in missing[0].text @@ -434,14 +581,12 @@ async def test_file_access_provider_tools_round_trip_files( assert parsed[0]["file_name"] == "plan.md" assert parsed[0]["matching_lines"][0]["line"] == "ERROR replaced" - # The search tool should likewise accept an optional directory argument so - # agents can scope a search to a subfolder. + # The search tool is recursive from the store root; scope to a subtree using + # the glob (``*`` crosses ``/`` with fnmatch). Results use root-relative names. await save_file.invoke(arguments={"file_name": "reports/issues.md", "content": "ERROR nested"}) - scoped = await search_files.invoke( - arguments={"regex_pattern": "error", "file_pattern": "*.md", "directory": "reports"} - ) + scoped = await search_files.invoke(arguments={"regex_pattern": "error", "file_pattern": "reports/*"}) scoped_parsed = json.loads(scoped[0].text) - assert [entry["file_name"] for entry in scoped_parsed] == ["issues.md"] + assert [entry["file_name"] for entry in scoped_parsed] == ["reports/issues.md"] deleted = await delete_file.invoke(arguments={"file_name": "plan.md"}) assert "deleted" in deleted[0].text diff --git a/python/samples/02-agents/context_providers/file_access_data_processing/data_processing.py b/python/samples/02-agents/context_providers/file_access_data_processing/data_processing.py index 0d4e67ff38d..aac04f6ea18 100644 --- a/python/samples/02-agents/context_providers/file_access_data_processing/data_processing.py +++ b/python/samples/02-agents/context_providers/file_access_data_processing/data_processing.py @@ -38,7 +38,9 @@ ## Getting started - Start by listing available files with file_access_list_files to see what data - is available. + is available. Files may be organized into subdirectories — use + file_access_list_subdirectories to discover folders and explore the tree level + by level. - Read the files to understand their structure and contents. ## Working with data @@ -86,7 +88,7 @@ async def main() -> None: # 3. Wire up the file access provider against a file-system-backed store # rooted at the sample's working/ folder. The provider injects its - # default instructions plus exposes five file_access_* tools to the + # default instructions plus exposes six file_access_* tools to the # agent for the duration of each run. file_access = FileAccessProvider(store=FileSystemAgentFileStore(working_dir)) diff --git a/python/samples/02-agents/harness/console/observers/planning_models.py b/python/samples/02-agents/harness/console/observers/planning_models.py index d4c425b0783..ba918d7de27 100644 --- a/python/samples/02-agents/harness/console/observers/planning_models.py +++ b/python/samples/02-agents/harness/console/observers/planning_models.py @@ -41,7 +41,12 @@ class PlanningQuestion(BaseModel): choices: list[str] | None = Field( default=None, description=( - "For clarifications, this has a list of options that the user can choose from. null for approvals." + "For clarifications, this has a list of options that the user can " + "choose from. null for approvals." + "Note: for clarifications, the user will always also be presented with a free form input option, so make sure that each choice provided here is a valid input for the next turn." + 'E.g. if the question is "Which stock are you referring to?" then valid choices might be ["AAPL", "MSFT", "GOOG"], and the user could also type their own answer.' + "Invalid choices would be [\"Enter tickers directly\", \"Paste tickers\"], since these conflict with the already existing freeform option, and doesn't directly provide valid inputs for the next turn." + ), ) From e18bc5a1f9bd81db153e0e3868703761342d07fe Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 11 Jun 2026 14:54:59 +0000 Subject: [PATCH 2/3] Fix choices field description: spacing, line length, grammar Addresses PR review: separate concatenated string literals with proper spacing/newlines, wrap lines under the 120-char Ruff limit, and fix "doesn't" -> "don't". Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../harness/console/observers/planning_models.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/python/samples/02-agents/harness/console/observers/planning_models.py b/python/samples/02-agents/harness/console/observers/planning_models.py index ba918d7de27..909f6a4036b 100644 --- a/python/samples/02-agents/harness/console/observers/planning_models.py +++ b/python/samples/02-agents/harness/console/observers/planning_models.py @@ -42,11 +42,16 @@ class PlanningQuestion(BaseModel): default=None, description=( "For clarifications, this has a list of options that the user can " - "choose from. null for approvals." - "Note: for clarifications, the user will always also be presented with a free form input option, so make sure that each choice provided here is a valid input for the next turn." - 'E.g. if the question is "Which stock are you referring to?" then valid choices might be ["AAPL", "MSFT", "GOOG"], and the user could also type their own answer.' - "Invalid choices would be [\"Enter tickers directly\", \"Paste tickers\"], since these conflict with the already existing freeform option, and doesn't directly provide valid inputs for the next turn." - + "choose from. null for approvals.\n\n" + "Note: for clarifications, the user will always also be presented with " + "a free form input option, so make sure that each choice provided here " + "is a valid input for the next turn.\n" + 'E.g. if the question is "Which stock are you referring to?" then valid ' + 'choices might be ["AAPL", "MSFT", "GOOG"], and the user could also type ' + "their own answer.\n" + 'Invalid choices would be ["Enter tickers directly", "Paste tickers"], ' + "since these conflict with the already existing freeform option, and " + "don't directly provide valid inputs for the next turn." ), ) From 9a4081ceb4d3ee1d4a0b150e80f13a3c20d3a0d5 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 11 Jun 2026 18:47:54 +0000 Subject: [PATCH 3/3] Address PR comments --- .../agent_framework/_harness/_file_access.py | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/python/packages/core/agent_framework/_harness/_file_access.py b/python/packages/core/agent_framework/_harness/_file_access.py index fadcf4617d0..98024b2cdf8 100644 --- a/python/packages/core/agent_framework/_harness/_file_access.py +++ b/python/packages/core/agent_framework/_harness/_file_access.py @@ -1101,7 +1101,13 @@ async def file_access_list_files(directory: str | None = None) -> list[str] | st @tool(name="file_access_list_subdirectories", approval_mode="never_require") async def file_access_list_subdirectories(directory: str | None = None) -> list[str] | str: - """List the direct child subdirectory names of a directory. Omit ``directory`` (or pass an empty string) to list the root. To enumerate subdirectories of a subdirectory, pass its relative path, for example ``"reports"`` or ``"reports/2024"``. Use this together with file_access_list_files to explore the directory tree level by level.""" # noqa: E501 + """List the direct child subdirectory names of a directory. + + Omit ``directory`` (or pass an empty string) to list the root. + To enumerate subdirectories of a subdirectory, pass its relative path, for example + ``"reports"`` or ``"reports/2024"``. + Use this together with file_access_list_files to explore the directory tree level by level. + """ target = directory if directory and directory.strip() else "" try: return await self.store.list_directories(target) @@ -1115,7 +1121,20 @@ async def file_access_search_files( regex_pattern: str, file_pattern: str | None = None, ) -> list[dict[str, Any]] | str: - """Search the contents of all files in the store (recursively, across all subdirectories) using a regular expression pattern (case-insensitive). Optionally filter which files to search using a glob pattern matched against each file's path relative to the store root. The glob uses fnmatch semantics where ``*`` matches any characters including ``/``: use ``"*.md"`` to match markdown files at any depth, or ``"reports/*"`` to restrict the search to the ``reports`` subtree. Leave empty or omit to search all files. Returns matching results whose file_name values are paths relative to the store root (usable with file_access_read_file), along with snippets and matching lines with line numbers. The regex_pattern must be 256 characters or fewer.""" # noqa: E501 + """Search the contents of all files in the store using a case-insensitive regular expression. + + The search runs recursively across all subdirectories. + Optionally filter which files to search using a glob pattern matched against each file's + path relative to the store root. + The glob uses fnmatch semantics where ``*`` matches any characters including ``/``: use + ``"*.md"`` to match markdown files at any depth, + or ``"reports/*"`` to restrict the search to the ``reports`` subtree. + Leave empty or omit to search all files. + Returns matching results whose file_name values are paths relative to the store root + (usable with file_access_read_file), + along with snippets and matching lines with line numbers. The regex_pattern must be + 256 characters or fewer. + """ pattern = file_pattern if file_pattern and file_pattern.strip() else None try: results = await self.store.search_files("", regex_pattern, pattern, recursive=True)