From c9373e5ec8074bde088ed3276e9c5937a4c59041 Mon Sep 17 00:00:00 2001 From: cristipufu Date: Tue, 3 Mar 2026 16:44:18 +0200 Subject: [PATCH 1/4] fix: resolve debug runtime breakpoint and state event inconsistencies Fix four issues in the debug breakpoint stack: 1. project_dir mismatch: get_schema now uses CWD as project_dir (matching BreakpointController) so graph node IDs are consistent. 2. qualified_node_name line mismatch: state events now use graph node IDs (first-code-line) instead of frame.f_lineno (def line), so clients can map state events to graph nodes. 3. SignalR bridge node reference: _add_breakpoints and _handle_remove_breakpoints now prefer node.id (file:line format) over node.name for function runtimes. 4. Generator/coroutine spurious events: suppress duplicate STARTED on yield/await resumption and skip COMPLETED on yield. Also fix non-project coroutine frame state leak that could poison frame-id reuse. Co-Authored-By: Claude Opus 4.6 --- src/uipath/_cli/_debug/_bridge.py | 31 +- src/uipath/functions/debug.py | 380 ++++++++++---- src/uipath/functions/graph_builder.py | 42 +- src/uipath/functions/runtime.py | 7 +- tests/functions/test_debug_breakpoints.py | 611 +++++++++++++++++++++- tests/functions/test_graph_builder.py | 60 ++- 6 files changed, 975 insertions(+), 156 deletions(-) diff --git a/src/uipath/_cli/_debug/_bridge.py b/src/uipath/_cli/_debug/_bridge.py index 9607398a0..97e461fc4 100644 --- a/src/uipath/_cli/_debug/_bridge.py +++ b/src/uipath/_cli/_debug/_bridge.py @@ -765,16 +765,16 @@ async def _handle_resume(self, args: list[Any]) -> None: def _add_breakpoints(self, breakpoints: list[dict[str, Any]]) -> None: for bp in breakpoints: - node_name = ( - bp.get("node", {}).get("name") - if isinstance(bp.get("node"), dict) - else None - ) - if node_name: - self.state.add_breakpoint(node_name) - logger.info(f"Breakpoint added: {node_name}") + node = bp.get("node", {}) if isinstance(bp.get("node"), dict) else {} + # Prefer node ID ("file:line" format) which the + # BreakpointController can parse. Fall back to node name + # for agent-style runtimes that use name-based breakpoints. + node_ref = node.get("id") or node.get("name") + if node_ref: + self.state.add_breakpoint(node_ref) + logger.info(f"Breakpoint added: {node_ref}") else: - logger.warning(f"Breakpoint without node name: {bp}") + logger.warning(f"Breakpoint without node id or name: {bp}") async def _handle_step(self, args: list[Any]) -> None: """Handle Step command from SignalR server. @@ -820,14 +820,11 @@ async def _handle_remove_breakpoints(self, args: list[Any]) -> None: logger.info("All breakpoints cleared") else: for bp in break_points: - node_name = ( - bp.get("node", {}).get("name") - if isinstance(bp.get("node"), dict) - else None - ) - if node_name: - self.state.remove_breakpoint(node_name) - logger.info(f"Breakpoint removed: {node_name}") + node = bp.get("node", {}) if isinstance(bp.get("node"), dict) else {} + node_ref = node.get("id") or node.get("name") + if node_ref: + self.state.remove_breakpoint(node_ref) + logger.info(f"Breakpoint removed: {node_ref}") async def _handle_quit(self, _args: list[Any]) -> None: """Handle Quit command from SignalR server.""" diff --git a/src/uipath/functions/debug.py b/src/uipath/functions/debug.py index a79835e3d..b94afa400 100644 --- a/src/uipath/functions/debug.py +++ b/src/uipath/functions/debug.py @@ -9,9 +9,9 @@ Composition chain (debug command): - UiPathDebugRuntime → bridge I/O, resume loop - └─ UiPathDebugFunctionsRuntime → trace-based line breakpoints - └─ UiPathFunctionsRuntime → loads & executes user code + UiPathDebugRuntime -> bridge I/O, resume loop + └─ UiPathDebugFunctionsRuntime -> trace-based line breakpoints + └─ UiPathFunctionsRuntime -> loads & executes user code """ from __future__ import annotations @@ -41,6 +41,22 @@ logger = logging.getLogger(__name__) +# Safety limits for local variable capture +_MAX_LOCALS = 100 +_MAX_STRING_LENGTH = 10_000 +_MAX_COLLECTION_ITEMS = 500 + +# Thread health-check interval (seconds) for wait_for_event +_EVENT_POLL_INTERVAL = 1.0 + +# Bitmask for generator / coroutine / async-generator code objects. +# Used to suppress spurious state events on yield/await resumption. +_CO_GENERATOR_LIKE = ( + 0x20 # CO_GENERATOR + | 0x80 # CO_COROUTINE + | 0x200 # CO_ASYNC_GENERATOR +) + def _capture_frame_locals(frame: FrameType) -> dict[str, Any]: """Snapshot local variables from a live frame for variable inspection. @@ -48,22 +64,43 @@ def _capture_frame_locals(frame: FrameType) -> dict[str, Any]: Primitives and JSON-serialisable collections are kept as-is so the bridge can render them natively. Everything else is repr()-ed to guarantee safe serialisation over the wire. + + Safety guards: + - At most ``_MAX_LOCALS`` variables captured. + - Strings longer than ``_MAX_STRING_LENGTH`` are truncated. + - Collections larger than ``_MAX_COLLECTION_ITEMS`` are summarised. """ snapshot: dict[str, Any] = {} - for name, value in frame.f_locals.items(): + for i, (name, value) in enumerate(frame.f_locals.items()): + if i >= _MAX_LOCALS: + snapshot["..."] = f"<{len(frame.f_locals) - _MAX_LOCALS} more locals>" + break try: - if isinstance(value, (bool, int, float, str, type(None))): + if isinstance(value, (bool, int, float, type(None))): snapshot[name] = value + elif isinstance(value, str): + if len(value) <= _MAX_STRING_LENGTH: + snapshot[name] = value + else: + snapshot[name] = value[:_MAX_STRING_LENGTH] + "..." elif isinstance(value, (dict, list, tuple)): - # Strict probe — no default=str, so nested non-serialisable - # objects (code, frame, etc.) correctly fail here. - json.dumps(value) - snapshot[name] = value + if len(value) > _MAX_COLLECTION_ITEMS: + snapshot[name] = f"<{type(value).__name__} with {len(value)} items>" + else: + # Strict probe -- no default=str, so nested non-serialisable + # objects (code, frame, etc.) correctly fail here. + json.dumps(value) + snapshot[name] = value else: snapshot[name] = repr(value) except Exception: try: - snapshot[name] = repr(value) + r = repr(value) + snapshot[name] = ( + r + if len(r) <= _MAX_STRING_LENGTH + else r[:_MAX_STRING_LENGTH] + "..." + ) except Exception: snapshot[name] = "" return snapshot @@ -78,6 +115,22 @@ def _format_location(filepath: str, line: int) -> str: return f"{relative}:{line}" +class _FrameState: + """Per-frame tracking for the trace callback. + + Created on ``call`` and cleaned up on ``return`` (for normal + functions). For generators/coroutines the state persists across + yield/await cycles so we can suppress duplicate "started" events on + resumption and skip "completed" on intermediate yields. + """ + + __slots__ = ("faulted", "is_generator") + + def __init__(self, *, is_generator: bool = False) -> None: + self.faulted: bool = False + self.is_generator: bool = is_generator + + class BreakpointController: """Synchronises a sys.settrace-instrumented thread with an async stream. @@ -87,7 +140,7 @@ class BreakpointController: 2. start() launches delegate.execute() in a daemon thread with tracing enabled. 3. When a matching line is reached the thread pauses and a - ("breakpoint", …) event is enqueued. + ("breakpoint", ...) event is enqueued. 4. The async stream dequeues the event and yields a UiPathBreakpointResult. 5. resume() unblocks the thread so it continues to the next @@ -101,6 +154,7 @@ def __init__( breakpoints: list[str] | Literal["*"], entrypoint_path: str | None = None, state_tracked_functions: dict[str, set[str]] | None = None, + node_id_map: dict[tuple[str, str], str] | None = None, ) -> None: """Initialize the controller. @@ -109,40 +163,51 @@ def __init__( state_tracked_functions: If provided, state events are emitted *only* for function calls whose ``(abs_file_path, func_name)`` appears in this mapping - (``{abs_path: {func_name, …}, …}``). When ``None``, no state + (``{abs_path: {func_name, ...}, ...}``). When ``None``, no state events are emitted. + node_id_map: + Mapping of ``(abs_path, func_name)`` → graph node ID string + (e.g. ``"src/main.py:5"``). Used to set ``qualified_node_name`` + on state events so they match the graph node IDs returned by + ``get_schema``. """ self._project_dir = project_dir self._entrypoint_path = ( os.path.abspath(entrypoint_path) if entrypoint_path else None ) self._step_mode: bool = breakpoints == "*" - self._file_breakpoints: dict[str, set[int]] = {} if isinstance(breakpoints, list): - self._parse_breakpoints(breakpoints) + self._file_breakpoints: dict[str, set[int]] = self._build_breakpoint_map( + breakpoints + ) + else: + self._file_breakpoints: dict[str, set[int]] = {} self._state_tracked: dict[str, set[str]] | None = state_tracked_functions + self._node_id_map: dict[tuple[str, str], str] = node_id_map or {} + # Per-frame tracking. Keyed by id(frame); created on ``call``, + # removed on ``return`` for normal functions. For generators the + # state persists across yield/resume cycles. + self._frame_states: dict[int, _FrameState] = {} self._events: queue.Queue[tuple[str, Any]] = queue.Queue() self._resume_event = threading.Event() self._stopped = False self._thread: threading.Thread | None = None self._abspath_cache: dict[str, str] = {} - # Track which breakpoints already fired per frame so that - # multiline expressions (where the bytecodes bounce back to the - # call line after evaluating arguments on deeper lines) don't - # trigger the same breakpoint twice within one function call. - self._hit_lines: dict[int, set[int]] = {} # frame-id → {lines} # Breakpoint management - def _parse_breakpoints(self, breakpoints: list[str]) -> None: - """Parse breakpoint strings into *file → line-numbers* mappings. + def _build_breakpoint_map(self, breakpoints: list[str]) -> dict[str, set[int]]: + """Parse breakpoint strings into a new *file -> line-numbers* mapping. + + Returns a fresh dict (safe for atomic reference swap). Supported formats:: - "42" → line 42 in the entrypoint file - "main.py:42" → line 42 in main.py (resolved relative to cwd) + "42" -> line 42 in the entrypoint file + "main.py:42" -> line 42 in main.py (resolved relative to cwd) """ + result: dict[str, set[int]] = {} for bp in breakpoints: if ":" in bp: file_part, line_str = bp.rsplit(":", 1) @@ -151,23 +216,27 @@ def _parse_breakpoints(self, breakpoints: list[str]) -> None: except ValueError: continue resolved = os.path.abspath(file_part) - self._file_breakpoints.setdefault(resolved, set()).add(line) + result.setdefault(resolved, set()).add(line) else: try: line = int(bp) except ValueError: continue # non-numeric tokens (agent node names) are ignored if self._entrypoint_path is not None: - self._file_breakpoints.setdefault(self._entrypoint_path, set()).add( - line - ) + result.setdefault(self._entrypoint_path, set()).add(line) + return result def update_breakpoints(self, breakpoints: list[str] | Literal["*"] | None) -> None: - """Replace the active breakpoint set (called between resume cycles).""" + """Replace the active breakpoint set (called between resume cycles). + + Builds a new dict and swaps the reference atomically so the trace + thread never sees a partially-cleared mapping. + """ self._step_mode = breakpoints == "*" - self._file_breakpoints.clear() if isinstance(breakpoints, list): - self._parse_breakpoints(breakpoints) + self._file_breakpoints = self._build_breakpoint_map(breakpoints) + else: + self._file_breakpoints = {} # Tracing @@ -195,7 +264,7 @@ def _is_tracked_function(self, abspath: str, func_name: str) -> bool: return funcs is not None and func_name in funcs def _trace_callback(self, frame: FrameType, event: str, arg: Any) -> Any: - """sys.settrace callback — dispatched for every frame event.""" + """sys.settrace callback -- dispatched for every frame event.""" if self._stopped: return None @@ -210,35 +279,54 @@ def _trace_callback(self, frame: FrameType, event: str, arg: Any) -> Any: if event == "call": is_project = self._is_project_file(filepath) - # Emit state event only for tracked graph-node functions - if is_project and self._is_tracked_function( - filepath, frame.f_code.co_name - ): - self._events.put( - ( - "state", - { - "file": filepath, - "line": frame.f_lineno, - "function": frame.f_code.co_name, - "locals": _capture_frame_locals(frame), - "phase": "started", - }, + # Decide whether to trace *into* this frame FIRST. + # We only get "return" events for traced frames, so we + # must not create _FrameState for frames we won't trace + # (their state would leak and poison future frame-id reuse). + trace_into = False + if self._step_mode: + trace_into = is_project + elif filepath in self._file_breakpoints: + trace_into = True + elif is_project and filepath in (self._state_tracked or {}): + trace_into = True + + if not trace_into: + return None + + frame_id = id(frame) + func_name = frame.f_code.co_name + is_gen = bool(frame.f_code.co_flags & _CO_GENERATOR_LIKE) + + existing = self._frame_states.get(frame_id) + if existing is not None and existing.is_generator: + # Generator/coroutine resumption after yield/await. + # Don't emit a duplicate "started" event and don't + # reset the fault tracker. + pass + else: + # Fresh frame (or a reused id from a deallocated frame). + state = _FrameState(is_generator=is_gen) + self._frame_states[frame_id] = state + + # Emit "started" state event for tracked functions. + if is_project and self._is_tracked_function(filepath, func_name): + node_id = self._node_id_map.get((filepath, func_name)) + self._events.put( + ( + "state", + { + "file": filepath, + "line": frame.f_lineno, + "function": func_name, + "locals": _capture_frame_locals(frame), + "phase": "started", + "node_id": node_id, + }, + ) ) - ) - # Reset per-frame hit tracking so each call starts fresh. - self._hit_lines[id(frame)] = set() - - # Decide whether to trace *into* this function's frame. - if self._step_mode: - return self._trace_callback if is_project else None - if filepath in self._file_breakpoints: - return self._trace_callback - # Also trace project files that contain tracked functions - if is_project and filepath in (self._state_tracked or {}): - return self._trace_callback - return None + return self._trace_callback if event == "line": # Skip module-level lines (imports, class/function defs). @@ -252,17 +340,6 @@ def _trace_callback(self, frame: FrameType, event: str, arg: Any) -> Any: ) or (lineno in self._file_breakpoints.get(filepath, ())) if should_break: - # Deduplicate: multiline expressions (e.g. - # ``return Foo(arg=bar(...))``) cause the bytecode to - # bounce back to the call-site line after evaluating - # arguments on deeper lines. Without this guard the - # same breakpoint would fire twice per call. - frame_hits = self._hit_lines.get(id(frame)) - if frame_hits is not None and lineno in frame_hits: - return self._trace_callback - if frame_hits is not None: - frame_hits.add(lineno) - self._events.put( ( "breakpoint", @@ -281,39 +358,69 @@ def _trace_callback(self, frame: FrameType, event: str, arg: Any) -> Any: if self._stopped: return None - elif event == "return": - # Emit a "completed" state event for tracked functions + elif event == "exception": + # Record that this frame saw an unhandled exception so the + # "return" handler can emit "faulted" instead of "completed". if self._is_project_file(filepath) and self._is_tracked_function( filepath, frame.f_code.co_name ): - locals_snapshot = _capture_frame_locals(frame) - if arg is not None: - try: - import json as _json - - _json.dumps(arg) - locals_snapshot["__return__"] = arg - except Exception: - locals_snapshot["__return__"] = repr(arg) - self._events.put( - ( - "state", - { - "file": filepath, - "line": frame.f_lineno, - "function": frame.f_code.co_name, - "locals": locals_snapshot, - "phase": "completed", - }, + state = self._frame_states.get(id(frame)) + if state is not None: + state.faulted = True + + elif event == "return": + frame_id = id(frame) + func_name = frame.f_code.co_name + state = self._frame_states.get(frame_id) + + # Generator/coroutine frames: don't pop state or emit + # "completed" on yield/await -- the frame is merely + # suspended, not finished. + if state is not None and state.is_generator: + pass # state persists for the next resumption + else: + # Normal function: pop state and emit terminal event. + state = self._frame_states.pop(frame_id, None) + if self._is_project_file(filepath) and self._is_tracked_function( + filepath, func_name + ): + saw_exception = state is not None and state.faulted + + # Exception propagated (arg is None on unhandled raise) + if saw_exception and arg is None: + phase = "faulted" + else: + phase = "completed" + + node_id = self._node_id_map.get((filepath, func_name)) + locals_snapshot = _capture_frame_locals(frame) + if arg is not None: + try: + import json as _json + + _json.dumps(arg) + locals_snapshot["__return__"] = arg + except Exception: + locals_snapshot["__return__"] = repr(arg) + self._events.put( + ( + "state", + { + "file": filepath, + "line": frame.f_lineno, + "function": func_name, + "locals": locals_snapshot, + "phase": phase, + "node_id": node_id, + }, + ) ) - ) - # Clean up per-frame tracking when the frame exits. - self._hit_lines.pop(id(frame), None) return self._trace_callback except Exception: - # Never let our own errors propagate — that would disable tracing. + # Never let our own errors propagate -- that would disable tracing. + logger.debug("Error in trace callback", exc_info=True) return self._trace_callback # Thread lifecycle @@ -345,24 +452,47 @@ def _run( options: UiPathExecuteOptions | None, ) -> None: """Thread entry-point: install the trace, execute, report result.""" + loop = asyncio.new_event_loop() try: sys.settrace(self._trace_callback) - loop = asyncio.new_event_loop() try: result = loop.run_until_complete(delegate.execute(input, options)) finally: - loop.close() - sys.settrace(None) + sys.settrace(None) self._events.put(("completed", result)) except Exception as exc: - sys.settrace(None) self._events.put(("error", exc)) + finally: + # Cancel any lingering tasks so loop.close() doesn't warn. + try: + pending = asyncio.all_tasks(loop) + for task in pending: + task.cancel() + if pending: + loop.run_until_complete( + asyncio.gather(*pending, return_exceptions=True) + ) + except Exception: + pass + loop.close() # Inter-thread communication def wait_for_event(self) -> tuple[str, Any]: - """Block until the next breakpoint hit or execution completion.""" - return self._events.get() + """Block until the next breakpoint hit or execution completion. + + Periodically checks that the trace thread is still alive so callers + do not hang indefinitely if the thread dies unexpectedly (e.g. via + ``SystemExit`` or a C-extension segfault). + """ + while True: + try: + return self._events.get(timeout=_EVENT_POLL_INTERVAL) + except queue.Empty: + if self._thread is not None and not self._thread.is_alive(): + raise RuntimeError( + "Trace thread died without producing a terminal event" + ) def resume(self) -> None: """Unblock the trace thread so it continues past the current breakpoint.""" @@ -391,7 +521,7 @@ class UiPathDebugFunctionsRuntime: that appears in the entrypoint's call graph, so the debug bridge can visualise execution flow through the graph nodes. - Works for both sync and async user functions — async functions run in + Works for both sync and async user functions -- async functions run in a dedicated asyncio event loop on the background thread. Parameters @@ -438,9 +568,9 @@ async def stream( Breakpoint formats (via options.breakpoints): - * "42" — line 42 in the entrypoint file - * "main.py:42" — line 42 in *main.py* (resolved from cwd) - * "*" — **step mode**: break on every line in project files + * "42" -- line 42 in the entrypoint file + * "main.py:42" -- line 42 in *main.py* (resolved from cwd) + * "*" -- **step mode**: break on every line in project files """ breakpoints = options.breakpoints if options else None @@ -455,9 +585,9 @@ async def stream( # Build the set of tracked functions from the call graph so we # can emit state events even without breakpoints. - tracked = self._build_tracked_functions() + tracked, node_id_map = self._build_tracked_functions() - # Nothing to trace → transparent delegation. The controller + # Nothing to trace -> transparent delegation. The controller # path runs delegate.execute() in a background thread with a # new asyncio event loop, so we only use it when there is # something to observe (breakpoints and/or state tracking). @@ -471,10 +601,11 @@ async def stream( breakpoints=breakpoints if breakpoints else [], entrypoint_path=self._entrypoint_path, state_tracked_functions=tracked, + node_id_map=node_id_map, ) self._controller = controller - # Strip breakpoints from the options forwarded to the delegate — + # Strip breakpoints from the options forwarded to the delegate -- # the delegate does not handle them; we do via the trace. delegate_options = UiPathExecuteOptions( resume=options.resume if options else False, @@ -495,13 +626,23 @@ async def dispose(self) -> None: self._controller = None await self.delegate.dispose() - def _build_tracked_functions(self) -> dict[str, set[str]] | None: - """Build a mapping of abs_file → {func_names} from the call graph. - - Returns None when the graph cannot be built (missing path / name). + def _build_tracked_functions( + self, + ) -> tuple[dict[str, set[str]] | None, dict[tuple[str, str], str]]: + """Build tracked-function and node-ID mappings from the call graph. + + Returns: + ------- + tracked: + ``{abs_path: {func_name, ...}}`` or ``None`` when the graph + cannot be built. + node_id_map: + ``{(abs_path, func_name): node_id}`` so the trace callback + can set ``qualified_node_name`` to match the graph node IDs + returned by ``get_schema``. """ if not self._entrypoint_path or not self._function_name: - return None + return None, {} try: from .graph_builder import build_call_graph @@ -513,17 +654,19 @@ def _build_tracked_functions(self) -> dict[str, set[str]] | None: ) tracked: dict[str, set[str]] = {} + node_id_map: dict[tuple[str, str], str] = {} for node in graph.nodes: file_rel = (node.metadata or {}).get("file") if not file_rel: continue abs_path = os.path.abspath(file_rel) tracked.setdefault(abs_path, set()).add(node.name) + node_id_map[(abs_path, node.name)] = node.id - return tracked if tracked else None + return (tracked if tracked else None), node_id_map except Exception: logger.debug("Failed to build call graph for state tracking", exc_info=True) - return None + return None, {} async def _drain_events(self) -> AsyncGenerator[UiPathRuntimeEvent, None]: """Drain events from the controller, yielding state events and stopping at a terminal event.""" @@ -539,9 +682,16 @@ async def _drain_events(self) -> AsyncGenerator[UiPathRuntimeEvent, None]: def _to_state_event(data: dict[str, Any]) -> UiPathRuntimeStateEvent: """Convert a trace state event into a UiPathRuntimeStateEvent.""" phase = UiPathRuntimeStatePhase(data.get("phase", "updated")) + # Use the pre-resolved graph node ID when available so the client + # can map state events to graph nodes. Falls back to the raw + # file:line from the frame (which may differ from the graph ID + # because frame.f_lineno on "call" is the def line, not the + # first-code-line used by the graph builder). + node_id = data.get("node_id") + qualified = node_id if node_id else _format_location(data["file"], data["line"]) return UiPathRuntimeStateEvent( node_name=data["function"], - qualified_node_name=_format_location(data["file"], data["line"]), + qualified_node_name=qualified, payload=data["locals"], phase=phase, ) diff --git a/src/uipath/functions/graph_builder.py b/src/uipath/functions/graph_builder.py index 8638d698b..390312703 100644 --- a/src/uipath/functions/graph_builder.py +++ b/src/uipath/functions/graph_builder.py @@ -102,11 +102,23 @@ def _is_project_file(self, abs_path: str) -> bool: def _find_function_def( self, tree: ast.Module, name: str ) -> ast.FunctionDef | ast.AsyncFunctionDef | None: - """Find a top-level function definition by name.""" + """Find a function definition by name. + + Searches top-level functions first, then methods inside class + definitions so that class-based entrypoints are discoverable. + """ + # Top-level functions for node in ast.iter_child_nodes(tree): if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): if node.name == name: return node + # Methods inside class definitions + for node in ast.iter_child_nodes(tree): + if isinstance(node, ast.ClassDef): + for item in node.body: + if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)): + if item.name == name: + return item return None def _resolve_imports( @@ -205,18 +217,28 @@ def _extract_call_info(self, call_node: ast.Call) -> _CallSite | None: return None @staticmethod - def _last_body_line( + def _first_code_line( func_def: ast.FunctionDef | ast.AsyncFunctionDef, ) -> int: - """Return the line number of the last statement in the function body. + """Return the line number of the first code statement after the docstring. - Using the last body line (rather than the first) means breakpoints - derived from graph-node IDs fire at the end of the function where - all local variables are visible for inspection. + If the body starts with a docstring (``ast.Expr`` wrapping an + ``ast.Constant(value=str)``), the second statement's line is used. + Falls back to ``body[0].lineno`` when there is no non-docstring + statement, or ``func_def.lineno`` for an empty body. """ - if func_def.body: - return func_def.body[-1].lineno - return func_def.lineno + if not func_def.body: + return func_def.lineno + + first = func_def.body[0] + is_docstring = ( + isinstance(first, ast.Expr) + and isinstance(first.value, ast.Constant) + and isinstance(first.value.value, str) + ) + if is_docstring and len(func_def.body) > 1: + return func_def.body[1].lineno + return first.lineno def visit_function(self, abs_file: str, func_name: str, depth: int) -> str | None: """Process a function: create its node and recurse into its calls. @@ -231,7 +253,7 @@ def visit_function(self, abs_file: str, func_name: str, depth: int) -> str | Non if func_def is None: return None - node_id = self._node_id(abs_file, self._last_body_line(func_def)) + node_id = self._node_id(abs_file, self._first_code_line(func_def)) # Add node even if already visited (we need the ID for edges) if node_id in self._visited: diff --git a/src/uipath/functions/runtime.py b/src/uipath/functions/runtime.py index 3467d72b5..4b0746a11 100644 --- a/src/uipath/functions/runtime.py +++ b/src/uipath/functions/runtime.py @@ -237,13 +237,16 @@ async def get_schema(self) -> UiPathRuntimeSchema: raw_output_schema = get_type_schema(hints.get("return")) output_schema = transform_attachments(raw_output_schema) - # Build call graph from AST + # Build call graph from AST. + # Use CWD as project_dir so node IDs (relative paths) are + # consistent with BreakpointController and _build_tracked_functions, + # which also resolve relative to CWD. graph = None try: graph = build_call_graph( str(self.file_path), self.function_name, - project_dir=str(self.file_path.parent), + project_dir=str(Path.cwd()), ) except Exception: logger.debug( diff --git a/tests/functions/test_debug_breakpoints.py b/tests/functions/test_debug_breakpoints.py index 4735f9e2a..32dc0a943 100644 --- a/tests/functions/test_debug_breakpoints.py +++ b/tests/functions/test_debug_breakpoints.py @@ -691,11 +691,12 @@ async def test_multiline_expression_breakpoint_hits_once(self, script_dir: Path) assert result.status == UiPathRuntimeStatus.SUCCESSFUL - # The breakpoint should fire exactly ONCE despite bytecode bouncing - bp_nodes = [h.breakpoint_node for h in bridge.breakpoint_hits] - assert len(bridge.breakpoint_hits) == 1, ( - f"Expected 1 breakpoint hit but got {len(bridge.breakpoint_hits)}: {bp_nodes}" - ) + # The breakpoint must fire at least once. Multiline expressions + # may cause it to fire more than once due to bytecode bouncing + # (the same line is revisited after nested arguments are + # evaluated). This is acceptable — suppressing the bounce + # would also suppress legitimate loop-iteration breakpoints. + assert len(bridge.breakpoint_hits) >= 1 finally: await runtime.dispose() @@ -835,3 +836,603 @@ async def test_stream_phase_events_from_functions_runtime(self, script_dir: Path assert result_events[0].output == {"value": 6} finally: await inner.dispose() + + async def test_function_that_raises_emits_faulted(self, script_dir: Path): + """A tracked function that raises an exception should emit FAULTED, not COMPLETED.""" + script = _write_script( + script_dir, + "main.py", + "def raiser():\n" + " raise ValueError('boom')\n" + "\n" + "def main(input):\n" + " try:\n" + " raiser()\n" + " except ValueError:\n" + " pass\n" + ' return {"ok": True}\n', + ) + runtime, bridge = _build_stack(script, breakpoints="*") + + try: + result = await runtime.execute({}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"ok": True} + + # raiser() should have 1 STARTED + 1 FAULTED + raiser_started = [ + s + for s in bridge.state_updates + if s.node_name == "raiser" + and s.phase == UiPathRuntimeStatePhase.STARTED + ] + raiser_faulted = [ + s + for s in bridge.state_updates + if s.node_name == "raiser" + and s.phase == UiPathRuntimeStatePhase.FAULTED + ] + raiser_completed = [ + s + for s in bridge.state_updates + if s.node_name == "raiser" + and s.phase == UiPathRuntimeStatePhase.COMPLETED + ] + assert len(raiser_started) == 1 + assert len(raiser_faulted) == 1 + assert len(raiser_completed) == 0 + finally: + await runtime.dispose() + + async def test_function_catches_exception_emits_completed(self, script_dir: Path): + """A tracked function that catches its own exception should emit COMPLETED, not FAULTED.""" + script = _write_script( + script_dir, + "main.py", + "def catcher():\n" + " try:\n" + " raise ValueError('internal')\n" + " except ValueError:\n" + " return 42\n" + "\n" + "def main(input):\n" + " result = catcher()\n" + ' return {"result": result}\n', + ) + runtime, bridge = _build_stack(script, breakpoints="*") + + try: + result = await runtime.execute({}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"result": 42} + + # catcher() should have 1 STARTED + 1 COMPLETED (not FAULTED) + catcher_started = [ + s + for s in bridge.state_updates + if s.node_name == "catcher" + and s.phase == UiPathRuntimeStatePhase.STARTED + ] + catcher_completed = [ + s + for s in bridge.state_updates + if s.node_name == "catcher" + and s.phase == UiPathRuntimeStatePhase.COMPLETED + ] + catcher_faulted = [ + s + for s in bridge.state_updates + if s.node_name == "catcher" + and s.phase == UiPathRuntimeStatePhase.FAULTED + ] + assert len(catcher_started) == 1 + assert len(catcher_completed) == 1 + assert len(catcher_faulted) == 0 + finally: + await runtime.dispose() + + +class TestRobustness: + """Tests for edge-case safety and hardening improvements.""" + + async def test_large_string_locals_are_truncated(self, script_dir: Path): + """Very large string locals should be truncated, not cause OOM.""" + script = _write_script( + script_dir, + "big_str.py", + "def main(input):\n" + " big = 'x' * 100_000\n" + " y = 1\n" + ' return {"y": y}\n', + ) + runtime, bridge = _build_stack(script, breakpoints=["3"]) + + try: + result = await runtime.execute({}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert len(bridge.breakpoint_hits) == 1 + state = bridge.breakpoint_hits[0].current_state + # Should be truncated, not the full 100k + assert len(state["big"]) <= 11_000 + assert state["big"].endswith("...") + finally: + await runtime.dispose() + + async def test_large_collection_locals_are_summarized(self, script_dir: Path): + """Collections with many items should be summarised, not serialised.""" + script = _write_script( + script_dir, + "big_list.py", + "def main(input):\n" + " big = list(range(10_000))\n" + " y = 1\n" + ' return {"y": y}\n', + ) + runtime, bridge = _build_stack(script, breakpoints=["3"]) + + try: + result = await runtime.execute({}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert len(bridge.breakpoint_hits) == 1 + state = bridge.breakpoint_hits[0].current_state + # Large collection should be a summary string, not the full list + assert isinstance(state["big"], str) + assert "10000" in state["big"] + finally: + await runtime.dispose() + + async def test_many_locals_are_capped(self, script_dir: Path): + """Functions with hundreds of locals should be capped at _MAX_LOCALS.""" + assignments = "\n".join(f" v{i} = {i}" for i in range(150)) + script = _write_script( + script_dir, + "many_locals.py", + f'def main(input):\n{assignments}\n y = 1\n return {{"y": y}}\n', + ) + # Breakpoint on a line after all assignments + line_num = 153 # 1 (def) + 150 (assignments) + 1 (y = 1) + 1 (return) + runtime, bridge = _build_stack(script, breakpoints=[str(line_num)]) + + try: + result = await runtime.execute({}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert len(bridge.breakpoint_hits) == 1 + state = bridge.breakpoint_hits[0].current_state + # Should be capped at _MAX_LOCALS (100) + the overflow entry + assert len(state) <= 101 + assert "..." in state + finally: + await runtime.dispose() + + async def test_breakpoint_in_loop_fires_each_iteration(self, script_dir: Path): + """Breakpoint inside a for-loop should fire on every iteration.""" + script = _write_script( + script_dir, + "loop.py", + "def main(input):\n" + " total = 0\n" + " for i in range(3):\n" + " total += i\n" # line 4 breakpoint + ' return {"total": total}\n', + ) + runtime, bridge = _build_stack(script, breakpoints=["4"]) + + try: + result = await runtime.execute({}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"total": 3} # 0+1+2 + # Should fire 3 times, once per iteration + assert len(bridge.breakpoint_hits) == 3 + finally: + await runtime.dispose() + + def test_wait_for_event_raises_on_dead_thread(self, script_dir: Path): + """wait_for_event raises RuntimeError when the trace thread dies.""" + import threading as _threading + + ctrl = BreakpointController( + project_dir=str(script_dir), + breakpoints=[], + ) + # Create a thread that exits immediately + ctrl._thread = _threading.Thread(target=lambda: None) + ctrl._thread.start() + ctrl._thread.join() + + # Thread is dead, no events in queue -> should raise, not hang + with pytest.raises(RuntimeError, match="Trace thread died"): + ctrl.wait_for_event() + + def test_update_breakpoints_atomic_swap(self, script_dir: Path): + """update_breakpoints should swap the dict atomically, never clear-then-add.""" + script = script_dir / "atomic.py" + script.write_text("x = 1\n") + ctrl = BreakpointController( + project_dir=str(script_dir), + breakpoints=["3"], + entrypoint_path=str(script), + ) + abspath = str(script.resolve()) + assert ctrl._file_breakpoints[abspath] == {3} + + # Swap to a new set -- should never be empty in between + ctrl.update_breakpoints(["7", "9"]) + assert ctrl._file_breakpoints[abspath] == {7, 9} + + # Step mode -- breakpoints should be empty + ctrl.update_breakpoints("*") + assert ctrl._step_mode is True + assert ctrl._file_breakpoints == {} + + # None -- should clear both + ctrl.update_breakpoints(None) + assert ctrl._step_mode is False + assert ctrl._file_breakpoints == {} + + def test_find_class_method_in_graph(self, script_dir: Path): + """Graph builder should find methods defined inside class bodies.""" + from uipath.functions.graph_builder import build_call_graph + + _write_script( + script_dir, + "cls.py", + "class Calculator:\n" + " def add(self, a, b):\n" + " return a + b\n" + "\n" + "def helper(x):\n" + " return x * 2\n", + ) + graph = build_call_graph( + str(script_dir / "cls.py"), "add", project_dir=str(script_dir) + ) + node_names = [n.name for n in graph.nodes] + assert "add" in node_names + + async def test_step_mode_in_loop_fires_each_iteration(self, script_dir: Path): + """Step mode should fire breakpoints on every iteration of a loop body.""" + script = _write_script( + script_dir, + "loop_step.py", + "def main(input):\n" + " total = 0\n" + " for i in range(3):\n" + " total += i\n" + ' return {"total": total}\n', + ) + runtime, bridge = _build_stack(script, breakpoints="*") + + try: + result = await runtime.execute({}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"total": 3} + + # Count hits on line 4 (total += i) -- should be 3 + line4_hits = [ + h for h in bridge.breakpoint_hits if ":4" in h.breakpoint_node + ] + assert len(line4_hits) == 3 + finally: + await runtime.dispose() + + +class TestProjectDirConsistency: + """Verify graph node IDs, breakpoints, and state events all use CWD-relative paths.""" + + async def test_cross_file_breakpoint_on_helper( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + """Breakpoints on called functions in another file work using graph node IDs.""" + import sys as _sys + + mod_name = "bp_helpers" + monkeypatch.chdir(tmp_path) + monkeypatch.syspath_prepend(tmp_path) + monkeypatch.delitem(_sys.modules, mod_name, raising=False) + + _write_script( + tmp_path, + f"{mod_name}.py", + "def helper(n):\n" + " doubled = n * 2\n" # line 2 + " return doubled\n", + ) + script = _write_script( + tmp_path, + "bp_main.py", + f"from {mod_name} import helper\n" + "\n" + "def main(input):\n" + ' val = input.get("n", 5)\n' + " result = helper(val)\n" + ' return {"result": result}\n', + ) + + # get_schema should produce node IDs relative to CWD + from uipath.functions.graph_builder import build_call_graph + + graph = build_call_graph(str(script), "main", project_dir=str(tmp_path)) + helper_node = next(n for n in graph.nodes if n.name == "helper") + # Node ID should be ".py:" (CWD-relative) + assert helper_node.id.startswith(f"{mod_name}.py:") + + # Run the debug stack with a breakpoint on the helper's graph node ID + runtime, bridge = _build_stack(script, breakpoints=[helper_node.id]) + + try: + result = await runtime.execute({"n": 4}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"result": 8} + # The breakpoint on the helper should have fired + assert len(bridge.breakpoint_hits) >= 1 + assert bridge.breakpoint_hits[0].current_state["n"] == 4 + finally: + monkeypatch.delitem(_sys.modules, mod_name, raising=False) + await runtime.dispose() + + async def test_schema_and_debug_use_same_project_dir( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + """get_schema and _build_tracked_functions produce the same node IDs.""" + import sys as _sys + + mod_name = "schema_helpers" + monkeypatch.chdir(tmp_path) + monkeypatch.syspath_prepend(tmp_path) + monkeypatch.delitem(_sys.modules, mod_name, raising=False) + + _write_script( + tmp_path, + f"{mod_name}.py", + "def helper():\n return 1\n", + ) + script = _write_script( + tmp_path, + "schema_main.py", + f"from {mod_name} import helper\n\ndef main(input):\n return helper()\n", + ) + + inner = UiPathFunctionsRuntime(str(script), "main", "main") + schema = await inner.get_schema() + schema_node_ids = {n.id for n in schema.graph.nodes} if schema.graph else set() + + debug_fn = UiPathDebugFunctionsRuntime( + inner, entrypoint_path=str(script), function_name="main" + ) + tracked, node_id_map = debug_fn._build_tracked_functions() + + # Both main and helper should be in the schema graph + assert len(schema_node_ids) >= 2, f"Expected >=2 nodes, got {schema_node_ids}" + + # Every node_id from the debug side should appear in the schema graph + for node_id in node_id_map.values(): + assert node_id in schema_node_ids, ( + f"Debug node_id {node_id!r} not in schema graph {schema_node_ids}" + ) + + monkeypatch.delitem(_sys.modules, mod_name, raising=False) + await inner.dispose() + + +class TestQualifiedNodeName: + """State events' qualified_node_name should match graph node IDs.""" + + async def test_qualified_name_matches_graph_node_id( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + """State event qualified_node_name should equal the graph node ID, not the def line.""" + import sys as _sys + + mod_name = "qn_helpers" + monkeypatch.chdir(tmp_path) + monkeypatch.syspath_prepend(tmp_path) + monkeypatch.delitem(_sys.modules, mod_name, raising=False) + + _write_script( + tmp_path, + f"{mod_name}.py", + "def helper(n):\n" + ' """A helper with a docstring."""\n' + " return n * 2\n", # line 3 = first_code_line + ) + script = _write_script( + tmp_path, + "qn_main.py", + f"from {mod_name} import helper\n" + "\n" + "def main(input):\n" + ' """Main docstring."""\n' + ' val = input.get("n", 5)\n' # line 5 = first_code_line + " result = helper(val)\n" + ' return {"result": result}\n', + ) + + # Build graph to find expected node IDs + from uipath.functions.graph_builder import build_call_graph + + graph = build_call_graph(str(script), "main", project_dir=str(tmp_path)) + node_ids = {n.name: n.id for n in graph.nodes} + + runtime, bridge = _build_stack(script, breakpoints="*") + try: + result = await runtime.execute({"n": 3}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"result": 6} + + # Every "started" state event's qualified_node_name should match + # the graph node ID for that function. + started_checked = set() + for s in bridge.state_updates: + if ( + s.phase == UiPathRuntimeStatePhase.STARTED + and s.node_name in node_ids + ): + started_checked.add(s.node_name) + assert s.qualified_node_name == node_ids[s.node_name], ( + f"{s.node_name}: qualified_node_name={s.qualified_node_name!r} " + f"!= graph node id={node_ids[s.node_name]!r}" + ) + + # Ensure we actually checked both main and helper + assert "main" in started_checked, "No STARTED event for main" + assert "helper" in started_checked, "No STARTED event for helper" + finally: + monkeypatch.delitem(_sys.modules, mod_name, raising=False) + await runtime.dispose() + + +class TestGeneratorStateEvents: + """Generator/coroutine frames should not produce spurious state events.""" + + async def test_generator_helper_single_started_completed(self, script_dir: Path): + """A generator helper should emit at most one STARTED event, not one per yield.""" + script = _write_script( + script_dir, + "main.py", + "def gen_helper():\n" + " yield 1\n" + " yield 2\n" + " yield 3\n" + "\n" + "def main(input):\n" + " total = sum(gen_helper())\n" + ' return {"total": total}\n', + ) + runtime, bridge = _build_stack(script, breakpoints="*") + + try: + result = await runtime.execute({}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"total": 6} + + # gen_helper should have at most 1 STARTED (not 3+) + gen_started = [ + s + for s in bridge.state_updates + if s.node_name == "gen_helper" + and s.phase == UiPathRuntimeStatePhase.STARTED + ] + assert len(gen_started) <= 1, ( + f"Expected at most 1 STARTED for gen_helper, got {len(gen_started)}" + ) + + # gen_helper should NOT have COMPLETED (we suppress it for generators) + gen_completed = [ + s + for s in bridge.state_updates + if s.node_name == "gen_helper" + and s.phase == UiPathRuntimeStatePhase.COMPLETED + ] + assert len(gen_completed) == 0, ( + f"Expected 0 COMPLETED for gen_helper, got {len(gen_completed)}" + ) + + # main (non-generator) should still have started + completed + main_started = [ + s + for s in bridge.state_updates + if s.node_name == "main" and s.phase == UiPathRuntimeStatePhase.STARTED + ] + main_completed = [ + s + for s in bridge.state_updates + if s.node_name == "main" + and s.phase == UiPathRuntimeStatePhase.COMPLETED + ] + assert len(main_started) == 1 + assert len(main_completed) == 1 + finally: + await runtime.dispose() + + async def test_async_helper_single_started(self, script_dir: Path): + """An async helper that awaits should not emit duplicate STARTED events.""" + script = _write_script( + script_dir, + "main.py", + "import asyncio\n" + "\n" + "async def async_helper(n):\n" + " await asyncio.sleep(0)\n" + " return n * 2\n" + "\n" + "async def main(input):\n" + ' val = input.get("n", 5)\n' + " result = await async_helper(val)\n" + ' return {"result": result}\n', + ) + runtime, bridge = _build_stack(script, breakpoints="*") + + try: + result = await runtime.execute({"n": 3}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"result": 6} + + # async_helper should have at most 1 STARTED + helper_started = [ + s + for s in bridge.state_updates + if s.node_name == "async_helper" + and s.phase == UiPathRuntimeStatePhase.STARTED + ] + assert len(helper_started) <= 1, ( + f"Expected at most 1 STARTED for async_helper, got {len(helper_started)}" + ) + finally: + await runtime.dispose() + + +class TestSignalRBridgeBreakpoints: + """SignalR bridge should use node.id for breakpoints, not node.name.""" + + def test_add_breakpoints_uses_node_id(self): + """_add_breakpoints should prefer node.id over node.name.""" + from uipath._cli._debug._bridge import SignalRDebugBridge + + bridge = SignalRDebugBridge(hub_url="http://localhost:0") + bridge._add_breakpoints( + [ + {"node": {"id": "src/main.py:5", "name": "main"}}, + {"node": {"id": "src/helpers.py:2", "name": "helper"}}, + ] + ) + bps = bridge.get_breakpoints() + assert "src/main.py:5" in bps + assert "src/helpers.py:2" in bps + # Should NOT contain the bare function names + assert "main" not in bps + assert "helper" not in bps + + def test_add_breakpoints_falls_back_to_name(self): + """When node.id is missing, _add_breakpoints falls back to node.name.""" + from uipath._cli._debug._bridge import SignalRDebugBridge + + bridge = SignalRDebugBridge(hub_url="http://localhost:0") + bridge._add_breakpoints([{"node": {"name": "agent_node"}}]) + bps = bridge.get_breakpoints() + assert "agent_node" in bps + + async def test_remove_breakpoints_uses_node_id(self): + """_handle_remove_breakpoints should use node.id to match _add_breakpoints.""" + from uipath._cli._debug._bridge import SignalRDebugBridge + + bridge = SignalRDebugBridge(hub_url="http://localhost:0") + bridge.state.add_breakpoint("src/main.py:5") + bridge.state.add_breakpoint("src/helpers.py:2") + + await bridge._handle_remove_breakpoints( + ['{"breakpoints": [{"node": {"id": "src/main.py:5", "name": "main"}}]}'] + ) + bps = bridge.get_breakpoints() + assert "src/main.py:5" not in bps + assert "src/helpers.py:2" in bps diff --git a/tests/functions/test_graph_builder.py b/tests/functions/test_graph_builder.py index 56bf97cd3..c31363926 100644 --- a/tests/functions/test_graph_builder.py +++ b/tests/functions/test_graph_builder.py @@ -263,8 +263,8 @@ def do_work(): assert "do_work" in node_names -def test_node_id_uses_last_body_line(tmp_path): - """Node ID should point to the last statement in the function body.""" +def test_node_id_uses_first_code_line(tmp_path): + """Node ID should point to the first code statement in the function body.""" (tmp_path / "main.py").write_text( textwrap.dedent("""\ def main(input): @@ -282,12 +282,12 @@ def main(input): assert len(graph.nodes) == 1 node = graph.nodes[0] - # Last body line is "return {"result": y}" at line 4 - assert node.id == "main.py:4" + # First code line is "x = input.get(...)" at line 2 + assert node.id == "main.py:2" -def test_node_id_last_line_with_docstring(tmp_path): - """Docstring should not affect the last body line calculation.""" +def test_node_id_skips_docstring(tmp_path): + """Node ID should skip the docstring and point to the first code line.""" (tmp_path / "main.py").write_text( textwrap.dedent("""\ def main(input): @@ -306,7 +306,53 @@ def main(input): assert len(graph.nodes) == 1 node = graph.nodes[0] - # Last body line is "return x + y" at line 5 + # First code line after docstring is "x = 1" at line 3 + assert node.id == "main.py:3" + + +def test_node_id_docstring_only(tmp_path): + """Function with only a docstring falls back to the docstring line.""" + (tmp_path / "main.py").write_text( + textwrap.dedent("""\ + def main(input): + \"\"\"Only a docstring, no code.\"\"\" + """) + ) + + graph = build_call_graph( + str(tmp_path / "main.py"), + "main", + project_dir=str(tmp_path), + ) + + assert len(graph.nodes) == 1 + node = graph.nodes[0] + # Only a docstring → falls back to body[0].lineno (line 2) + assert node.id == "main.py:2" + + +def test_node_id_multiline_docstring(tmp_path): + """Multiline docstring is skipped; node ID points to first code line after it.""" + (tmp_path / "main.py").write_text( + textwrap.dedent("""\ + def main(input): + \"\"\"This is a + multiline + docstring.\"\"\" + x = 1 + return x + """) + ) + + graph = build_call_graph( + str(tmp_path / "main.py"), + "main", + project_dir=str(tmp_path), + ) + + assert len(graph.nodes) == 1 + node = graph.nodes[0] + # Multiline docstring ends at line 4; first code line is "x = 1" at line 5 assert node.id == "main.py:5" From 0bc2fc7da5b91b605081320d475312c766758154 Mon Sep 17 00:00:00 2001 From: cristipufu Date: Tue, 3 Mar 2026 16:48:25 +0200 Subject: [PATCH 2/4] fix: resolve bare function-name breakpoints in controller instead of bridge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert bridge changes — handle function-name breakpoints entirely in BreakpointController._build_breakpoint_map by resolving bare names via the node_id_map. Replace SignalR bridge tests with controller-level resolution tests + an E2E test. Co-Authored-By: Claude Opus 4.6 --- src/uipath/_cli/_debug/_bridge.py | 31 ++++--- src/uipath/functions/debug.py | 29 +++++- tests/functions/test_debug_breakpoints.py | 108 +++++++++++++--------- 3 files changed, 108 insertions(+), 60 deletions(-) diff --git a/src/uipath/_cli/_debug/_bridge.py b/src/uipath/_cli/_debug/_bridge.py index 97e461fc4..9607398a0 100644 --- a/src/uipath/_cli/_debug/_bridge.py +++ b/src/uipath/_cli/_debug/_bridge.py @@ -765,16 +765,16 @@ async def _handle_resume(self, args: list[Any]) -> None: def _add_breakpoints(self, breakpoints: list[dict[str, Any]]) -> None: for bp in breakpoints: - node = bp.get("node", {}) if isinstance(bp.get("node"), dict) else {} - # Prefer node ID ("file:line" format) which the - # BreakpointController can parse. Fall back to node name - # for agent-style runtimes that use name-based breakpoints. - node_ref = node.get("id") or node.get("name") - if node_ref: - self.state.add_breakpoint(node_ref) - logger.info(f"Breakpoint added: {node_ref}") + node_name = ( + bp.get("node", {}).get("name") + if isinstance(bp.get("node"), dict) + else None + ) + if node_name: + self.state.add_breakpoint(node_name) + logger.info(f"Breakpoint added: {node_name}") else: - logger.warning(f"Breakpoint without node id or name: {bp}") + logger.warning(f"Breakpoint without node name: {bp}") async def _handle_step(self, args: list[Any]) -> None: """Handle Step command from SignalR server. @@ -820,11 +820,14 @@ async def _handle_remove_breakpoints(self, args: list[Any]) -> None: logger.info("All breakpoints cleared") else: for bp in break_points: - node = bp.get("node", {}) if isinstance(bp.get("node"), dict) else {} - node_ref = node.get("id") or node.get("name") - if node_ref: - self.state.remove_breakpoint(node_ref) - logger.info(f"Breakpoint removed: {node_ref}") + node_name = ( + bp.get("node", {}).get("name") + if isinstance(bp.get("node"), dict) + else None + ) + if node_name: + self.state.remove_breakpoint(node_name) + logger.info(f"Breakpoint removed: {node_name}") async def _handle_quit(self, _args: list[Any]) -> None: """Handle Quit command from SignalR server.""" diff --git a/src/uipath/functions/debug.py b/src/uipath/functions/debug.py index b94afa400..dd43cb96b 100644 --- a/src/uipath/functions/debug.py +++ b/src/uipath/functions/debug.py @@ -175,6 +175,8 @@ def __init__( self._entrypoint_path = ( os.path.abspath(entrypoint_path) if entrypoint_path else None ) + self._state_tracked: dict[str, set[str]] | None = state_tracked_functions + self._node_id_map: dict[tuple[str, str], str] = node_id_map or {} self._step_mode: bool = breakpoints == "*" if isinstance(breakpoints, list): self._file_breakpoints: dict[str, set[int]] = self._build_breakpoint_map( @@ -182,9 +184,6 @@ def __init__( ) else: self._file_breakpoints: dict[str, set[int]] = {} - - self._state_tracked: dict[str, set[str]] | None = state_tracked_functions - self._node_id_map: dict[tuple[str, str], str] = node_id_map or {} # Per-frame tracking. Keyed by id(frame); created on ``call``, # removed on ``return`` for normal functions. For generators the # state persists across yield/resume cycles. @@ -206,6 +205,7 @@ def _build_breakpoint_map(self, breakpoints: list[str]) -> dict[str, set[int]]: "42" -> line 42 in the entrypoint file "main.py:42" -> line 42 in main.py (resolved relative to cwd) + "helper" -> resolved via node_id_map to file:line """ result: dict[str, set[int]] = {} for bp in breakpoints: @@ -221,11 +221,32 @@ def _build_breakpoint_map(self, breakpoints: list[str]) -> dict[str, set[int]]: try: line = int(bp) except ValueError: - continue # non-numeric tokens (agent node names) are ignored + # Non-numeric token: try resolving as a function name + # via the node_id_map (the bridge may send bare names). + resolved_id = self._resolve_func_name(bp) + if resolved_id and ":" in resolved_id: + file_part, line_str = resolved_id.rsplit(":", 1) + try: + line = int(line_str) + except ValueError: + continue + resolved = os.path.abspath(file_part) + result.setdefault(resolved, set()).add(line) + continue if self._entrypoint_path is not None: result.setdefault(self._entrypoint_path, set()).add(line) return result + def _resolve_func_name(self, name: str) -> str | None: + """Look up a bare function name in the node_id_map. + + Returns the ``"file:line"`` node ID if found, otherwise ``None``. + """ + for (_, func_name), node_id in self._node_id_map.items(): + if func_name == name: + return node_id + return None + def update_breakpoints(self, breakpoints: list[str] | Literal["*"] | None) -> None: """Replace the active breakpoint set (called between resume cycles). diff --git a/tests/functions/test_debug_breakpoints.py b/tests/functions/test_debug_breakpoints.py index 32dc0a943..83d09f043 100644 --- a/tests/functions/test_debug_breakpoints.py +++ b/tests/functions/test_debug_breakpoints.py @@ -1392,47 +1392,71 @@ async def test_async_helper_single_started(self, script_dir: Path): await runtime.dispose() -class TestSignalRBridgeBreakpoints: - """SignalR bridge should use node.id for breakpoints, not node.name.""" - - def test_add_breakpoints_uses_node_id(self): - """_add_breakpoints should prefer node.id over node.name.""" - from uipath._cli._debug._bridge import SignalRDebugBridge - - bridge = SignalRDebugBridge(hub_url="http://localhost:0") - bridge._add_breakpoints( - [ - {"node": {"id": "src/main.py:5", "name": "main"}}, - {"node": {"id": "src/helpers.py:2", "name": "helper"}}, - ] +class TestFuncNameBreakpointResolution: + """BreakpointController should resolve bare function names via node_id_map.""" + + def test_bare_function_name_resolved_to_file_line(self): + """A bare function name breakpoint is resolved to file:line via node_id_map.""" + node_id_map = { + ("/abs/main.py", "main"): "main.py:5", + ("/abs/helpers.py", "helper"): "helpers.py:2", + } + controller = BreakpointController( + project_dir="/abs", + breakpoints=["helper"], + node_id_map=node_id_map, + ) + # The bare name "helper" should resolve to helpers.py:2 + abs_helpers = os.path.abspath("helpers.py") + assert abs_helpers in controller._file_breakpoints + assert 2 in controller._file_breakpoints[abs_helpers] + + def test_unresolvable_function_name_ignored(self): + """A bare function name not in node_id_map is silently ignored.""" + controller = BreakpointController( + project_dir="/abs", + breakpoints=["unknown_func"], + node_id_map={}, ) - bps = bridge.get_breakpoints() - assert "src/main.py:5" in bps - assert "src/helpers.py:2" in bps - # Should NOT contain the bare function names - assert "main" not in bps - assert "helper" not in bps - - def test_add_breakpoints_falls_back_to_name(self): - """When node.id is missing, _add_breakpoints falls back to node.name.""" - from uipath._cli._debug._bridge import SignalRDebugBridge - - bridge = SignalRDebugBridge(hub_url="http://localhost:0") - bridge._add_breakpoints([{"node": {"name": "agent_node"}}]) - bps = bridge.get_breakpoints() - assert "agent_node" in bps - - async def test_remove_breakpoints_uses_node_id(self): - """_handle_remove_breakpoints should use node.id to match _add_breakpoints.""" - from uipath._cli._debug._bridge import SignalRDebugBridge - - bridge = SignalRDebugBridge(hub_url="http://localhost:0") - bridge.state.add_breakpoint("src/main.py:5") - bridge.state.add_breakpoint("src/helpers.py:2") - - await bridge._handle_remove_breakpoints( - ['{"breakpoints": [{"node": {"id": "src/main.py:5", "name": "main"}}]}'] + assert len(controller._file_breakpoints) == 0 + + async def test_bridge_sends_func_name_breakpoint_hits( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + """End-to-end: bridge sends a bare function name, breakpoint fires.""" + import sys as _sys + + mod_name = "fname_helpers" + monkeypatch.chdir(tmp_path) + monkeypatch.syspath_prepend(tmp_path) + monkeypatch.delitem(_sys.modules, mod_name, raising=False) + + _write_script( + tmp_path, + f"{mod_name}.py", + "def helper(n):\n doubled = n * 2\n return doubled\n", + ) + script = _write_script( + tmp_path, + "fname_main.py", + f"from {mod_name} import helper\n" + "\n" + "def main(input):\n" + ' val = input.get("n", 5)\n' + " result = helper(val)\n" + ' return {"result": result}\n', ) - bps = bridge.get_breakpoints() - assert "src/main.py:5" not in bps - assert "src/helpers.py:2" in bps + + # The bridge sends bare function name "helper" as breakpoint + runtime, bridge = _build_stack(script, breakpoints=["helper"]) + + try: + result = await runtime.execute({"n": 4}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"result": 8} + # The bare name "helper" should have resolved and fired + assert len(bridge.breakpoint_hits) >= 1 + finally: + monkeypatch.delitem(_sys.modules, mod_name, raising=False) + await runtime.dispose() From 7dccd03fbd4ab119c85e32430dc71b1fac7ee4ff Mon Sep 17 00:00:00 2001 From: cristipufu Date: Tue, 3 Mar 2026 16:57:37 +0200 Subject: [PATCH 3/4] fix: restore breakpoint bounce-back dedup guard Re-add the dedup guard for single-line expressions like `result = dict(value=bar(val))` where bytecode revisits the call-site line after evaluating nested arguments. Track last_line per frame and suppress only when the immediately preceding line event was the same line (no intervening lines), so loop iterations still fire correctly. Co-Authored-By: Claude Opus 4.6 --- src/uipath/functions/debug.py | 27 +++++++++++++++++++- tests/functions/test_debug_breakpoints.py | 31 +++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/uipath/functions/debug.py b/src/uipath/functions/debug.py index dd43cb96b..8ab4bb2aa 100644 --- a/src/uipath/functions/debug.py +++ b/src/uipath/functions/debug.py @@ -124,11 +124,19 @@ class _FrameState: resumption and skip "completed" on intermediate yields. """ - __slots__ = ("faulted", "is_generator") + __slots__ = ("faulted", "is_generator", "last_line") def __init__(self, *, is_generator: bool = False) -> None: self.faulted: bool = False self.is_generator: bool = is_generator + # Last line event seen in this frame. Used to deduplicate the + # bounce-back pattern where multiline expressions (e.g. + # ``return Foo(arg=bar(...))``) cause the bytecode to revisit the + # call-site line after evaluating arguments on deeper lines. + # Only suppress when the *immediately preceding* line event was + # the same breakpoint line (no intervening lines), so loop + # iterations that pass through other lines still fire normally. + self.last_line: int = -1 class BreakpointController: @@ -356,11 +364,28 @@ def _trace_callback(self, frame: FrameType, event: str, arg: Any) -> Any: return self._trace_callback lineno = frame.f_lineno + # Update last_line for bounce-back dedup (see _FrameState). + state = self._frame_states.get(id(frame)) + prev_line = state.last_line if state is not None else -1 + if state is not None: + state.last_line = lineno + should_break = ( self._step_mode and self._is_project_file(filepath) ) or (lineno in self._file_breakpoints.get(filepath, ())) if should_break: + # Deduplicate: multiline expressions (e.g. + # ``return Foo(arg=bar(...))``) cause the bytecode to + # bounce back to the call-site line after evaluating + # arguments on deeper lines. Without this guard the + # same breakpoint would fire twice per call. + # Only suppress when the *immediately preceding* line + # event was the same line (no intervening lines), so + # loop iterations still fire normally. + if lineno == prev_line: + return self._trace_callback + self._events.put( ( "breakpoint", diff --git a/tests/functions/test_debug_breakpoints.py b/tests/functions/test_debug_breakpoints.py index 83d09f043..d7a4dcb65 100644 --- a/tests/functions/test_debug_breakpoints.py +++ b/tests/functions/test_debug_breakpoints.py @@ -1032,6 +1032,37 @@ async def test_breakpoint_in_loop_fires_each_iteration(self, script_dir: Path): finally: await runtime.dispose() + async def test_single_line_call_breakpoint_fires_once(self, script_dir: Path): + """Single-line call expression with nested call should not bounce-back. + + ``result = dict(value=bar(val))`` compiles so that the bytecode + visits line 6 twice: once to set up the call, and once after + ``bar()`` returns to execute the outer ``dict()`` call. The + breakpoint should fire only once. + """ + script = _write_script( + script_dir, + "bounce.py", + "def bar(x):\n" + " return x + 1\n" + "\n" + "def main(input):\n" + ' val = input.get("n", 5)\n' # line 5 + " result = dict(value=bar(val))\n" # line 6 ← breakpoint (single-line) + ' return {"result": result["value"]}\n', + ) + runtime, bridge = _build_stack(script, breakpoints=["6"]) + + try: + result = await runtime.execute({"n": 3}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"result": 4} # bar(3) = 4 + # The breakpoint on line 6 should fire exactly once, not twice + assert len(bridge.breakpoint_hits) == 1 + finally: + await runtime.dispose() + def test_wait_for_event_raises_on_dead_thread(self, script_dir: Path): """wait_for_event raises RuntimeError when the trace thread dies.""" import threading as _threading From 82eaf05e762aa7eaef5b5f84c9d441842a7a495e Mon Sep 17 00:00:00 2001 From: cristipufu Date: Tue, 3 Mar 2026 16:59:41 +0200 Subject: [PATCH 4/4] fix: handle multiline bounce-back dedup with backward-jump detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the single-line-only last_line check with a smarter heuristic: track whether any backward jump (line < last_bp_line) occurred since the last breakpoint fired. Bounce-backs only advance forward into argument lines then return — no backward jump. Loop iterations go backward to the loop header first, so the breakpoint correctly fires again. Co-Authored-By: Claude Opus 4.6 --- src/uipath/functions/debug.py | 51 +++++++++++++---------- tests/functions/test_debug_breakpoints.py | 39 +++++++++++++++-- 2 files changed, 64 insertions(+), 26 deletions(-) diff --git a/src/uipath/functions/debug.py b/src/uipath/functions/debug.py index 8ab4bb2aa..ab1fcd8cc 100644 --- a/src/uipath/functions/debug.py +++ b/src/uipath/functions/debug.py @@ -124,19 +124,21 @@ class _FrameState: resumption and skip "completed" on intermediate yields. """ - __slots__ = ("faulted", "is_generator", "last_line") + __slots__ = ("faulted", "is_generator", "last_bp_line", "saw_backward_jump") def __init__(self, *, is_generator: bool = False) -> None: self.faulted: bool = False self.is_generator: bool = is_generator - # Last line event seen in this frame. Used to deduplicate the - # bounce-back pattern where multiline expressions (e.g. - # ``return Foo(arg=bar(...))``) cause the bytecode to revisit the - # call-site line after evaluating arguments on deeper lines. - # Only suppress when the *immediately preceding* line event was - # the same breakpoint line (no intervening lines), so loop - # iterations that pass through other lines still fire normally. - self.last_line: int = -1 + # Bounce-back dedup state. Multiline call expressions like + # ``result = Foo(arg=bar(...))`` cause the bytecode to visit the + # call-site line, advance to argument lines, then bounce back to + # the call-site line for the outer call. We suppress this second + # hit by tracking whether any line *before* the last breakpoint + # was visited — a backward jump indicates a loop iteration (which + # should fire the breakpoint again), while only-forward movement + # followed by a return to the same line indicates a bounce. + self.last_bp_line: int = -1 + self.saw_backward_jump: bool = False class BreakpointController: @@ -364,11 +366,11 @@ def _trace_callback(self, frame: FrameType, event: str, arg: Any) -> Any: return self._trace_callback lineno = frame.f_lineno - # Update last_line for bounce-back dedup (see _FrameState). + # Track backward jumps for bounce-back dedup (see _FrameState). state = self._frame_states.get(id(frame)) - prev_line = state.last_line if state is not None else -1 - if state is not None: - state.last_line = lineno + if state is not None and state.last_bp_line >= 0: + if lineno < state.last_bp_line: + state.saw_backward_jump = True should_break = ( self._step_mode and self._is_project_file(filepath) @@ -376,15 +378,20 @@ def _trace_callback(self, frame: FrameType, event: str, arg: Any) -> Any: if should_break: # Deduplicate: multiline expressions (e.g. - # ``return Foo(arg=bar(...))``) cause the bytecode to - # bounce back to the call-site line after evaluating - # arguments on deeper lines. Without this guard the - # same breakpoint would fire twice per call. - # Only suppress when the *immediately preceding* line - # event was the same line (no intervening lines), so - # loop iterations still fire normally. - if lineno == prev_line: - return self._trace_callback + # ``result = Foo(\n arg=bar(...)\n)``) cause the + # bytecode to bounce back to the call-site line after + # evaluating arguments on deeper lines. Suppress the + # duplicate when we return to the same line and no + # backward jump occurred (which would indicate a loop + # iteration that should fire again). + if state is not None: + is_bounce = ( + lineno == state.last_bp_line and not state.saw_backward_jump + ) + if is_bounce: + return self._trace_callback + state.last_bp_line = lineno + state.saw_backward_jump = False self._events.put( ( diff --git a/tests/functions/test_debug_breakpoints.py b/tests/functions/test_debug_breakpoints.py index d7a4dcb65..2c86f8ee6 100644 --- a/tests/functions/test_debug_breakpoints.py +++ b/tests/functions/test_debug_breakpoints.py @@ -1032,8 +1032,8 @@ async def test_breakpoint_in_loop_fires_each_iteration(self, script_dir: Path): finally: await runtime.dispose() - async def test_single_line_call_breakpoint_fires_once(self, script_dir: Path): - """Single-line call expression with nested call should not bounce-back. + async def test_single_line_bounce_breakpoint_fires_once(self, script_dir: Path): + """Single-line call with nested call should not bounce-back. ``result = dict(value=bar(val))`` compiles so that the bytecode visits line 6 twice: once to set up the call, and once after @@ -1048,7 +1048,39 @@ async def test_single_line_call_breakpoint_fires_once(self, script_dir: Path): "\n" "def main(input):\n" ' val = input.get("n", 5)\n' # line 5 - " result = dict(value=bar(val))\n" # line 6 ← breakpoint (single-line) + " result = dict(value=bar(val))\n" # line 6 ← breakpoint + ' return {"result": result["value"]}\n', + ) + runtime, bridge = _build_stack(script, breakpoints=["6"]) + + try: + result = await runtime.execute({"n": 3}) + + assert result.status == UiPathRuntimeStatus.SUCCESSFUL + assert result.output == {"result": 4} # bar(3) = 4 + assert len(bridge.breakpoint_hits) == 1 + finally: + await runtime.dispose() + + async def test_multiline_bounce_breakpoint_fires_once(self, script_dir: Path): + """Multiline call expression should not bounce-back. + + ``result = dict(\\n value=bar(val),\\n)`` advances from the + call-site line to the argument lines, then bounces back to the + call-site line for the outer call. The breakpoint should fire + only once. + """ + script = _write_script( + script_dir, + "bounce_multi.py", + "def bar(x):\n" + " return x + 1\n" + "\n" + "def main(input):\n" + ' val = input.get("n", 5)\n' # line 5 + " result = dict(\n" # line 6 ← breakpoint + " value=bar(val),\n" # line 7 (argument evaluation) + " )\n" # line 8 ' return {"result": result["value"]}\n', ) runtime, bridge = _build_stack(script, breakpoints=["6"]) @@ -1058,7 +1090,6 @@ async def test_single_line_call_breakpoint_fires_once(self, script_dir: Path): assert result.status == UiPathRuntimeStatus.SUCCESSFUL assert result.output == {"result": 4} # bar(3) = 4 - # The breakpoint on line 6 should fire exactly once, not twice assert len(bridge.breakpoint_hits) == 1 finally: await runtime.dispose()