diff --git a/Framework/Built_In_Automation/Desktop/Windows/BuiltInFunctions.py b/Framework/Built_In_Automation/Desktop/Windows/BuiltInFunctions.py index 6bf3e4a19..80ec9bf32 100644 --- a/Framework/Built_In_Automation/Desktop/Windows/BuiltInFunctions.py +++ b/Framework/Built_In_Automation/Desktop/Windows/BuiltInFunctions.py @@ -66,12 +66,22 @@ # this needs to be here on top, otherwise will return error import clr, System dll_path = os.getcwd().split("Framework")[0] + "Framework" + os.sep + "windows_dll_files" + os.sep -clr.AddReference(dll_path+"UIAutomationClient") -clr.AddReference(dll_path+"UIAutomationTypes") -clr.AddReference(dll_path+"UIAutomationProvider") +clr.AddReference(dll_path + "UIAutomationClient") +clr.AddReference(dll_path + "UIAutomationTypes") +clr.AddReference(dll_path + "UIAutomationProvider") clr.AddReference("System.Windows.Forms") -from System.Windows.Automation import * +from System.Windows.Automation import ( + AutomationElement, + TreeScope, + Condition, + Automation, + InvokePattern, + ValuePattern, + TogglePattern, + SelectionItemPattern, + ExpandCollapsePattern, +) import pyautogui # Should be removed after we complete sequential actions import autoit # The likely method we'll use @@ -209,65 +219,62 @@ def Click_Element_None_Mouse(Element, Expand=True, Gui=False, offset: str | None pattern_name = Automation.PatternName(each) CommonUtil.ExecLog(sModuleInfo, "Pattern name attached to the current element is: %s " % pattern_name, 1) - # Expand and collapse actions - if pattern_name == "ExpandCollapse": - if Expand: - # check to see if its expanded, if expanded, then do nothing... if not, expand it - status = Element.GetCurrentPattern( - ExpandCollapsePattern.Pattern - ).Current.ExpandCollapseState - if status == 0: - CommonUtil.ExecLog(sModuleInfo, "Expanding the item", 1) - Element.GetCurrentPattern( + try: + # Expand and collapse actions + if pattern_name == "ExpandCollapse": + if Expand: + # check to see if its expanded, if expanded, then do nothing... if not, expand it + status = Element.GetCurrentPattern( ExpandCollapsePattern.Pattern - ).Expand() - return "passed" - elif status == 1: - CommonUtil.ExecLog(sModuleInfo, "Already expanded", 1) - return "passed" - else: - # check to see if its Collapsed, if Collapsed, then do nothing... if not, Collapse it - status = Element.GetCurrentPattern( - ExpandCollapsePattern.Pattern - ).Current.ExpandCollapseState - if status == 1: - CommonUtil.ExecLog(sModuleInfo, "Collapsing the item", 1) - Element.GetCurrentPattern( + ).Current.ExpandCollapseState + if status == 0: + CommonUtil.ExecLog(sModuleInfo, "Expanding the item", 1) + Element.GetCurrentPattern( + ExpandCollapsePattern.Pattern + ).Expand() + return "passed" + elif status == 1: + CommonUtil.ExecLog(sModuleInfo, "Already expanded", 1) + return "passed" + else: + # check to see if its Collapsed, if Collapsed, then do nothing... if not, Collapse it + status = Element.GetCurrentPattern( ExpandCollapsePattern.Pattern - ).Collapse() - return "passed" - elif status == 0: - CommonUtil.ExecLog(sModuleInfo, "Already collapsed", 1) - return "passed" - # Invoking actions - elif pattern_name == "Invoke": - CommonUtil.ExecLog(sModuleInfo, "Invoking the object", 1) - time.sleep(unnecessary_sleep) - Element.GetCurrentPattern(InvokePattern.Pattern).Invoke() - return "passed" - # Selection of an item - elif pattern_name == "SelectionItem": - CommonUtil.ExecLog(sModuleInfo, "Selecting an item", 1) - Element.GetCurrentPattern(SelectionItemPattern.Pattern).Select() - time.sleep(unnecessary_sleep) - return "passed" - # Toggling action - - elif pattern_name == "Toggle": - CommonUtil.ExecLog(sModuleInfo, "Toggling an item", 1) - Element.GetCurrentPattern(TogglePattern.Pattern).Toggle() - time.sleep(unnecessary_sleep) - return "passed" - # if no patterns are found, then we do an actual mouse click - else: - # x = int (Element.Current.BoundingRectangle.X) - # y = int (Element.Current.BoundingRectangle.Y) + ).Current.ExpandCollapseState + if status == 1: + CommonUtil.ExecLog(sModuleInfo, "Collapsing the item", 1) + Element.GetCurrentPattern( + ExpandCollapsePattern.Pattern + ).Collapse() + return "passed" + elif status == 0: + CommonUtil.ExecLog(sModuleInfo, "Already collapsed", 1) + return "passed" + # Invoking actions + elif pattern_name == "Invoke": + CommonUtil.ExecLog(sModuleInfo, "Invoking the object", 1) + time.sleep(unnecessary_sleep) + Element.GetCurrentPattern(InvokePattern.Pattern).Invoke() + return "passed" + # Selection of an item + elif pattern_name == "SelectionItem": + CommonUtil.ExecLog(sModuleInfo, "Selecting an item", 1) + Element.GetCurrentPattern(SelectionItemPattern.Pattern).Select() + time.sleep(unnecessary_sleep) + return "passed" + # Toggling action + elif pattern_name == "Toggle": + CommonUtil.ExecLog(sModuleInfo, "Toggling an item", 1) + Element.GetCurrentPattern(TogglePattern.Pattern).Toggle() + time.sleep(unnecessary_sleep) + return "passed" + except Exception as e: CommonUtil.ExecLog( sModuleInfo, - "We did not find any pattern for this object, so we will click by mouse with location", - 1, + f"Normal click ({pattern_name}) failed or did nothing ({e}). Automatically using GUI click.", + 2, ) - x, y = get_coords(Element) + x, y = get_coords(Element, offset) win32api.SetCursorPos((x, y)) win32api.mouse_event(win32con.MOUSEEVENTF_LEFTDOWN, x, y, 0, 0) time.sleep(0.1) @@ -275,6 +282,20 @@ def Click_Element_None_Mouse(Element, Expand=True, Gui=False, offset: str | None time.sleep(unnecessary_sleep) return "passed" + # if no patterns matched the standard ones, then we do an actual mouse click as fallback + CommonUtil.ExecLog( + sModuleInfo, + "We did not find any suitable pattern for this object, so we will click by mouse with location", + 1, + ) + x, y = get_coords(Element, offset) + win32api.SetCursorPos((x, y)) + win32api.mouse_event(win32con.MOUSEEVENTF_LEFTDOWN, x, y, 0, 0) + time.sleep(0.1) + win32api.mouse_event(win32con.MOUSEEVENTF_LEFTUP, x, y, 0, 0) + time.sleep(unnecessary_sleep) + return "passed" + CommonUtil.ExecLog(sModuleInfo, "Unable to perform the action on the object", 3) return "zeuz_failed" except Exception: @@ -309,7 +330,7 @@ def Check_uncheck(data_set): if command == "check" and is_selected == "On": CommonUtil.ExecLog(sModuleInfo, "The element is already checked so skipped it", 1) return "passed" - elif command == "uncheck" and not is_selected: + elif command == "uncheck" and is_selected == "Off": CommonUtil.ExecLog(sModuleInfo, "The element is already unchecked so skipped it", 1) return "passed" try: @@ -2152,7 +2173,7 @@ def Run_Application(data_set): #last_start_time = time.time() autoit.send("^{ESC}") time.sleep(keypress_interval) - autoit.send(Desktop_app) + autoit.send(Desktop_app, 1) time.sleep(keypress_interval) autoit.send("{ENTER}") CommonUtil.ExecLog(sModuleInfo, "Successfully launched your app", 1) diff --git a/server/main.py b/server/main.py index 3e295b8d2..d0ccb2cf9 100644 --- a/server/main.py +++ b/server/main.py @@ -11,6 +11,7 @@ from server.mobile import router as mobile_router, start_ui_dump_uploads from server.mac import router as mac_router from server.linux import router as linux_router +from server.windows import router as windows_router, upload_windows_ui_dump from server.installers import router as installers_router import asyncio @@ -43,12 +44,14 @@ def main() -> FastAPI: v1router.include_router(mobile_router) v1router.include_router(mac_router) v1router.include_router(linux_router) + v1router.include_router(windows_router) v1router.include_router(installers_router) app = FastAPI() @app.on_event("startup") async def _start_background_uploads(): start_ui_dump_uploads() + asyncio.create_task(upload_windows_ui_dump()) app.include_router(v1router) origins = [ diff --git a/server/windows.py b/server/windows.py new file mode 100644 index 000000000..a9e63cd4b --- /dev/null +++ b/server/windows.py @@ -0,0 +1,344 @@ +import hashlib +import os +import sys +import asyncio +import requests +import time +import xml.etree.ElementTree as ET +from typing import Literal +from fastapi import APIRouter +from pydantic import BaseModel + +from Framework.Utilities import ConfigModule, CommonUtil + + +router = APIRouter(prefix="/windows", tags=["windows"]) + +_TARGET_APP_NAME: str | None = None +_TARGET_APP_SET_TIME: float = 0.0 + +_HOTKEY = "ctrl+shift+i" + + +class InspectorResponse(BaseModel): + """Response model for the /inspect endpoint.""" + status: Literal["ok", "error"] = "ok" + ui_xml: str | None = None + error: str | None = None + + +class AppInfo(BaseModel): + """Model for an active application/window.""" + name: str + pid: int + class_name: str + automation_id: str + + +def _xml_escape(value: str) -> str: + """Escape special characters for XML attributes.""" + return ( + value + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """) + ) + + +_automation_loaded = False + + +def _get_automation_imports(): + """Lazily import UIAutomation types (only available on Windows with pythonnet). + + Mirrors the clr setup from Framework/Built_In_Automation/Desktop/Windows/BuiltInFunctions.py. + """ + global _automation_loaded + if not _automation_loaded: + import clr + dll_path = os.getcwd().split("Framework")[0] + "Framework" + os.sep + "windows_dll_files" + os.sep + clr.AddReference(dll_path + "UIAutomationClient") + clr.AddReference(dll_path + "UIAutomationTypes") + clr.AddReference(dll_path + "UIAutomationProvider") + _automation_loaded = True + + from System.Windows.Automation import ( + AutomationElement, + TreeScope, + Condition, + TreeWalker, + ) + return AutomationElement, TreeScope, Condition, TreeWalker + + +def _build_element_tree(xml_parent, ui_element, max_depth: int = 50, _depth: int = 0): + """Recursively build an ET tree from a UIAutomation element. + + Mirrors create_tree() from ZeuZ_Windows_Inspector.py:
tags with + Name, AutomationId, LocalizedControlType, ClassName, Left, Right, Top, Bottom. + """ + if _depth > max_depth: + return + + _, TreeScope, Condition, _ = _get_automation_imports() + try: + child_elements = ui_element.FindAll(TreeScope.Children, Condition.TrueCondition) + except Exception: + return + + for i in range(child_elements.Count): + each_child = child_elements[i] + try: + elem_name = _xml_escape(each_child.Current.Name or "") + elem_automationid = _xml_escape(each_child.Current.AutomationId or "") + elem_control = _xml_escape(each_child.Current.LocalizedControlType or "") + elem_class = _xml_escape(each_child.Current.ClassName or "") + try: + left = str(each_child.Current.BoundingRectangle.Left) + right = str(each_child.Current.BoundingRectangle.Right) + top = str(each_child.Current.BoundingRectangle.Top) + bottom = str(each_child.Current.BoundingRectangle.Bottom) + except Exception: + left, right, top, bottom = "", "", "", "" + + attribs = { + "Name": elem_name, + "AutomationId": elem_automationid, + "LocalizedControlType": elem_control, + "ClassName": elem_class, + "Left": left, + "Right": right, + "Top": top, + "Bottom": bottom, + } + xml_child = ET.SubElement(xml_parent, "div", **attribs) + _build_element_tree(xml_child, each_child, max_depth, _depth + 1) + except Exception: + continue + + +def _remove_coordinates(root): + """Remove Left/Right/Top/Bottom attributes from all elements. Matches inspector's Remove_coordinate().""" + for each in root: + att = each.attrib + for key in ("Left", "Right", "Top", "Bottom"): + att.pop(key, None) + _remove_coordinates(each) + + +def _find_window_by_name(app_name: str): + """Find the top-level window element matching app_name (case-insensitive substring match).""" + AutomationElement, TreeScope, Condition, _ = _get_automation_imports() + root = AutomationElement.RootElement + windows = root.FindAll(TreeScope.Children, Condition.TrueCondition) + + app_lower = app_name.lower() + for i in range(windows.Count): + try: + win = windows[i] + win_name = win.Current.Name or "" + if app_lower in win_name.lower(): + return win + except Exception: + continue + return None + + +def _get_ui_tree(app_name: str) -> ET.Element | None: + """Build the UI tree as an ET Element matching ZeuZ_Windows_Inspector format. + + Returns a root with Name, AutomationId, LocalizedControlType, ClassName, pid. + Children are
elements with the same attributes plus Left/Right/Top/Bottom. + """ + window = _find_window_by_name(app_name) + if window is None: + return None + + current = window.Current + attribs = { + "Name": _xml_escape(current.Name or ""), + "AutomationId": _xml_escape(current.AutomationId or ""), + "LocalizedControlType": _xml_escape(current.LocalizedControlType or ""), + "ClassName": _xml_escape(current.ClassName or ""), + "pid": str(current.ProcessId) if hasattr(current, "ProcessId") else "", + } + root = ET.Element("body", **attribs) + _build_element_tree(root, window) + return root + + +def _get_ui_tree_xml(app_name: str) -> str | None: + """Get the full UI tree of a window as XML string (with coordinates, for /inspect).""" + root = _get_ui_tree(app_name) + if root is None: + return None + try: + ET.indent(root) + except AttributeError: + pass + return ET.tostring(root, encoding="unicode") + + +def _get_ui_tree_xml_for_upload(app_name: str) -> str | None: + """Get the UI tree XML for upload (without coordinates, matching inspector's uploaded version).""" + root = _get_ui_tree(app_name) + if root is None: + return None + _remove_coordinates(root) + try: + ET.indent(root, "") + except AttributeError: + pass + return ET.tostring(root, encoding="unicode") + + + +def _wait_hotkey_and_capture(app_name: str) -> str | None: + """Block until user presses the hotkey, then immediately capture the UI tree. + + This runs in a thread so the menu stays open (no focus change). + """ + import keyboard + keyboard.wait(_HOTKEY) + return _get_ui_tree_xml(app_name) + + +def _get_active_apps() -> list[AppInfo]: + """Return all top-level windows (active apps) from the UIAutomation tree.""" + AutomationElement, TreeScope, Condition, _ = _get_automation_imports() + root = AutomationElement.RootElement + windows = root.FindAll(TreeScope.Children, Condition.TrueCondition) + + apps: list[AppInfo] = [] + for i in range(windows.Count): + try: + win = windows[i] + name = win.Current.Name or "" + # Skip empty-named windows (usually invisible system windows) + if not name.strip(): + continue + apps.append(AppInfo( + name=name, + pid=win.Current.ProcessId, + class_name=win.Current.ClassName or "", + automation_id=win.Current.AutomationId or "", + )) + except Exception: + continue + return apps + + +@router.get("/inspect") +async def inspect(app_name: str): + """Get the Windows UI DOM (XML tree) for a given application. + + Args: + app_name: Name (or substring) of the target application window. Required. + """ + global _TARGET_APP_NAME, _TARGET_APP_SET_TIME + _TARGET_APP_NAME = app_name + _TARGET_APP_SET_TIME = time.time() + + if sys.platform != "win32": + return InspectorResponse(status="error", error="This endpoint is only available on Windows") + + try: + xml_content = await asyncio.to_thread(_get_ui_tree_xml, app_name) + if not xml_content: + return InspectorResponse( + status="error", + error=f"No window found matching '{app_name}'. Use /apps to list active windows.", + ) + return InspectorResponse(status="ok", ui_xml=xml_content) + except Exception as e: + return InspectorResponse(status="error", error=str(e)) + + + +@router.get("/snapshot") +async def snapshot(app_name: str): + """Wait for hotkey press, then capture and return the UI tree. + + The request blocks until the user presses the hotkey (Ctrl+Shift+I). + This allows capturing menus/popups that disappear on focus change. + + Args: + app_name: Name (or substring) of the target application window. + """ + if sys.platform != "win32": + return InspectorResponse(status="error", error="This endpoint is only available on Windows") + + try: + xml_content = await asyncio.to_thread(_wait_hotkey_and_capture, app_name) + if not xml_content: + return InspectorResponse( + status="error", + error=f"No window found matching '{app_name}'. Use /apps to list active windows.", + ) + return InspectorResponse(status="ok", ui_xml=xml_content) + except Exception as e: + return InspectorResponse(status="error", error=str(e)) + + +@router.get("/apps", response_model=list[AppInfo]) +async def get_apps(): + """Return all opened/active application windows.""" + if sys.platform != "win32": + return [] + + try: + return await asyncio.to_thread(_get_active_apps) + except Exception: + return [] + + +async def upload_windows_ui_dump(): + """Continuously upload Windows UI dump if changed. + + Only runs on Windows. Uploads to the server with key 'dom_windows'. + """ + global _TARGET_APP_NAME, _TARGET_APP_SET_TIME + + if sys.platform != "win32": + return + + prev_xml_hash = "" + while True: + try: + if _TARGET_APP_NAME and (time.time() - _TARGET_APP_SET_TIME) > 8 * 3600: + _TARGET_APP_NAME = None + + target_app = _TARGET_APP_NAME + + if target_app: + xml_content = await asyncio.to_thread(_get_ui_tree_xml_for_upload, target_app) + if xml_content: + new_xml_hash = hashlib.sha256(xml_content.encode("utf-8")).hexdigest() + + if prev_xml_hash != new_xml_hash: + prev_xml_hash = new_xml_hash + + url = ( + ConfigModule.get_config_value("Authentication", "server_address").strip() + + "/node_ai_contents/" + ) + apiKey = ConfigModule.get_config_value("Authentication", "api-key").strip() + + res = await asyncio.to_thread( + requests.post, + url, + headers={"X-Api-Key": apiKey}, + json={ + "dom_win": {"dom": xml_content}, + "node_id": CommonUtil.MachineInfo().getLocalUser().lower(), + "app_name": target_app, + }, + timeout=10, + ) + if res.ok: + CommonUtil.ExecLog("", "Windows UI dump uploaded successfully", iLogLevel=1) + except Exception as e: + CommonUtil.ExecLog("", f"Error uploading Windows UI dump: {str(e)}", iLogLevel=3) + + await asyncio.sleep(5)