From 69e4555d277d52e01604801a7ca857133fe9fc8b Mon Sep 17 00:00:00 2001 From: Saivedant Hava Date: Sat, 6 Jun 2026 17:09:14 -0400 Subject: [PATCH] fix(knowledge): guard TreeKnowledge.find_path against malformed parent links find_path walked parent_id pointers without checking they stayed inside the node map. A parent_id pointing at a missing node raised KeyError, and a cycle in the parent links looped forever. Node data can come straight from an LLM (Paper is built via Agent(output_type=Paper)), so parent_id has no referential guarantee and both cases are reachable. Stop the walk when the chain leaves the node map or revisits a node, and return the best-effort partial path ending at the requested node. The happy path is unchanged. Adds tests for the dangling and cyclic cases. --- quantmind/knowledge/_tree.py | 16 ++++++++++++++-- tests/knowledge/test_tree.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/quantmind/knowledge/_tree.py b/quantmind/knowledge/_tree.py index 6725528..8958a54 100644 --- a/quantmind/knowledge/_tree.py +++ b/quantmind/knowledge/_tree.py @@ -74,12 +74,24 @@ def walk_dfs(self) -> Iterator[TreeNode]: stack.extend(reversed(node.children_ids)) def find_path(self, node_id: UUID) -> list[TreeNode]: - """Root-to-node path. Empty if `node_id` is not in the tree.""" + """Root-to-node path. + + Returns an empty list if ``node_id`` is not in the tree. If the + ancestor chain is malformed (a ``parent_id`` points outside the + node map, or the parents form a cycle), the walk stops early and + returns the best-effort partial path ending at ``node_id`` instead + of raising or looping forever. Node data may come from an LLM, so + ``parent_id`` carries no referential guarantee. + """ if node_id not in self.nodes: return [] path: list[TreeNode] = [] cursor: UUID | None = node_id - while cursor is not None: + visited: set[UUID] = set() + while cursor is not None and cursor in self.nodes: + if cursor in visited: + break + visited.add(cursor) node = self.nodes[cursor] path.append(node) cursor = node.parent_id diff --git a/tests/knowledge/test_tree.py b/tests/knowledge/test_tree.py index 825b04b..a5265fd 100644 --- a/tests/knowledge/test_tree.py +++ b/tests/knowledge/test_tree.py @@ -143,6 +143,40 @@ def test_find_path_unknown(self): tree = _make_tree() self.assertEqual(tree.find_path(uuid4()), []) + def test_find_path_dangling_parent(self): + # A parent_id pointing outside the node map must not raise. + missing = uuid4() + child_id = uuid4() + child = TreeNode( + node_id=child_id, + parent_id=missing, + title="Child", + summary="orphan", + ) + tree = _SampleTree( + as_of=_now(), + source=_src(), + root_node_id=child_id, + nodes={child_id: child}, + ) + path = tree.find_path(child_id) + self.assertEqual([n.title for n in path], ["Child"]) + + def test_find_path_cyclic_parent(self): + # A parent_id cycle must terminate, not loop forever. + a_id, b_id = uuid4(), uuid4() + a = TreeNode(node_id=a_id, parent_id=b_id, title="A", summary="a") + b = TreeNode(node_id=b_id, parent_id=a_id, title="B", summary="b") + tree = _SampleTree( + as_of=_now(), + source=_src(), + root_node_id=a_id, + nodes={a_id: a, b_id: b}, + ) + path = tree.find_path(a_id) + self.assertEqual({n.title for n in path}, {"A", "B"}) + self.assertEqual(len(path), 2) + def test_embedding_text_uses_root(self): tree = _make_tree() self.assertEqual(tree.embedding_text(), "Root\nroot summary")