Skip to content

Commit 0593115

Browse files
committed
feat: add Windows UI automation inspection and background DOM monitoring endpoints
1 parent 629e8b5 commit 0593115

File tree

2 files changed

+266
-0
lines changed

2 files changed

+266
-0
lines changed

server/main.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from server.mobile import router as mobile_router, start_ui_dump_uploads
1212
from server.mac import router as mac_router
1313
from server.linux import router as linux_router
14+
from server.windows import router as windows_router, upload_windows_ui_dump
1415
from server.installers import router as installers_router
1516
import asyncio
1617

@@ -43,12 +44,14 @@ def main() -> FastAPI:
4344
v1router.include_router(mobile_router)
4445
v1router.include_router(mac_router)
4546
v1router.include_router(linux_router)
47+
v1router.include_router(windows_router)
4648
v1router.include_router(installers_router)
4749
app = FastAPI()
4850

4951
@app.on_event("startup")
5052
async def _start_background_uploads():
5153
start_ui_dump_uploads()
54+
asyncio.create_task(upload_windows_ui_dump())
5255
app.include_router(v1router)
5356

5457
origins = [

server/windows.py

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
import hashlib
2+
import os
3+
import sys
4+
import asyncio
5+
import requests
6+
import time
7+
from typing import Literal
8+
from fastapi import APIRouter
9+
from pydantic import BaseModel
10+
11+
from Framework.Utilities import ConfigModule, CommonUtil
12+
13+
14+
router = APIRouter(prefix="/windows", tags=["windows"])
15+
16+
_TARGET_APP_NAME: str | None = None
17+
_TARGET_APP_SET_TIME: float = 0.0
18+
19+
20+
class InspectorResponse(BaseModel):
21+
"""Response model for the /inspect endpoint."""
22+
status: Literal["ok", "error"] = "ok"
23+
ui_xml: str | None = None
24+
error: str | None = None
25+
26+
27+
class AppInfo(BaseModel):
28+
"""Model for an active application/window."""
29+
name: str
30+
pid: int
31+
class_name: str
32+
automation_id: str
33+
34+
35+
def _xml_escape(value: str) -> str:
36+
"""Escape special characters for XML attributes."""
37+
return (
38+
value
39+
.replace("&", "&")
40+
.replace("<", "&lt;")
41+
.replace(">", "&gt;")
42+
.replace('"', "&quot;")
43+
)
44+
45+
46+
_automation_loaded = False
47+
48+
49+
def _get_automation_imports():
50+
"""Lazily import UIAutomation types (only available on Windows with pythonnet).
51+
52+
Mirrors the clr setup from Framework/Built_In_Automation/Desktop/Windows/BuiltInFunctions.py.
53+
"""
54+
global _automation_loaded
55+
if not _automation_loaded:
56+
import clr
57+
dll_path = os.getcwd().split("Framework")[0] + "Framework" + os.sep + "windows_dll_files" + os.sep
58+
clr.AddReference(dll_path + "UIAutomationClient")
59+
clr.AddReference(dll_path + "UIAutomationTypes")
60+
clr.AddReference(dll_path + "UIAutomationProvider")
61+
_automation_loaded = True
62+
63+
from System.Windows.Automation import (
64+
AutomationElement,
65+
TreeScope,
66+
Condition,
67+
TreeWalker,
68+
)
69+
return AutomationElement, TreeScope, Condition, TreeWalker
70+
71+
72+
def _dump_element_to_xml(element, indent_level: int = 0, max_depth: int = 30) -> list[str]:
73+
"""Recursively dump a UIAutomation element tree to XML strings."""
74+
if indent_level > max_depth:
75+
return []
76+
77+
lines: list[str] = []
78+
indent = " " * indent_level
79+
80+
try:
81+
current = element.Current
82+
control_type = current.LocalizedControlType or "unknown"
83+
# Sanitize the tag name: replace spaces with underscores
84+
tag = control_type.replace(" ", "_")
85+
name = _xml_escape(current.Name or "")
86+
class_name = _xml_escape(current.ClassName or "")
87+
automation_id = _xml_escape(current.AutomationId or "")
88+
89+
attrs = f'name="{name}"'
90+
if class_name:
91+
attrs += f' class="{class_name}"'
92+
if automation_id:
93+
attrs += f' automation_id="{automation_id}"'
94+
95+
# Add bounding rectangle if available
96+
try:
97+
rect = current.BoundingRectangle
98+
if rect.Width > 0 or rect.Height > 0:
99+
attrs += f' x="{int(rect.Left)}" y="{int(rect.Top)}"'
100+
attrs += f' width="{int(rect.Width)}" height="{int(rect.Height)}"'
101+
except Exception:
102+
pass
103+
104+
# Get children
105+
_, TreeScope, Condition, _ = _get_automation_imports()
106+
children = element.FindAll(TreeScope.Children, Condition.TrueCondition)
107+
108+
if children.Count > 0:
109+
lines.append(f'{indent}<{tag} {attrs}>')
110+
for i in range(children.Count):
111+
child = children[i]
112+
lines.extend(_dump_element_to_xml(child, indent_level + 1, max_depth))
113+
lines.append(f'{indent}</{tag}>')
114+
else:
115+
lines.append(f'{indent}<{tag} {attrs}/>')
116+
117+
except Exception:
118+
pass
119+
120+
return lines
121+
122+
123+
def _find_window_by_name(app_name: str):
124+
"""Find the top-level window element matching app_name (case-insensitive substring match)."""
125+
AutomationElement, TreeScope, Condition, _ = _get_automation_imports()
126+
root = AutomationElement.RootElement
127+
windows = root.FindAll(TreeScope.Children, Condition.TrueCondition)
128+
129+
app_lower = app_name.lower()
130+
for i in range(windows.Count):
131+
try:
132+
win = windows[i]
133+
win_name = win.Current.Name or ""
134+
if app_lower in win_name.lower():
135+
return win
136+
except Exception:
137+
continue
138+
return None
139+
140+
141+
def _get_ui_tree_xml(app_name: str) -> str | None:
142+
"""Get the full UI tree of a window as XML."""
143+
window = _find_window_by_name(app_name)
144+
if window is None:
145+
return None
146+
147+
xml_lines = ['<?xml version="1.0" encoding="UTF-8"?>']
148+
xml_lines.extend(_dump_element_to_xml(window, indent_level=0))
149+
return "\n".join(xml_lines)
150+
151+
152+
def _get_active_apps() -> list[AppInfo]:
153+
"""Return all top-level windows (active apps) from the UIAutomation tree."""
154+
AutomationElement, TreeScope, Condition, _ = _get_automation_imports()
155+
root = AutomationElement.RootElement
156+
windows = root.FindAll(TreeScope.Children, Condition.TrueCondition)
157+
158+
apps: list[AppInfo] = []
159+
for i in range(windows.Count):
160+
try:
161+
win = windows[i]
162+
name = win.Current.Name or ""
163+
# Skip empty-named windows (usually invisible system windows)
164+
if not name.strip():
165+
continue
166+
apps.append(AppInfo(
167+
name=name,
168+
pid=win.Current.ProcessId,
169+
class_name=win.Current.ClassName or "",
170+
automation_id=win.Current.AutomationId or "",
171+
))
172+
except Exception:
173+
continue
174+
return apps
175+
176+
177+
@router.get("/inspect")
178+
def inspect(app_name: str):
179+
"""Get the Windows UI DOM (XML tree) for a given application.
180+
181+
Args:
182+
app_name: Name (or substring) of the target application window. Required.
183+
"""
184+
global _TARGET_APP_NAME, _TARGET_APP_SET_TIME
185+
_TARGET_APP_NAME = app_name
186+
_TARGET_APP_SET_TIME = time.time()
187+
188+
if sys.platform != "win32":
189+
return InspectorResponse(status="error", error="This endpoint is only available on Windows")
190+
191+
try:
192+
xml_content = _get_ui_tree_xml(app_name)
193+
if not xml_content:
194+
return InspectorResponse(
195+
status="error",
196+
error=f"No window found matching '{app_name}'. Use /apps to list active windows.",
197+
)
198+
return InspectorResponse(status="ok", ui_xml=xml_content)
199+
except Exception as e:
200+
return InspectorResponse(status="error", error=str(e))
201+
202+
203+
@router.get("/apps", response_model=list[AppInfo])
204+
def get_apps():
205+
"""Return all opened/active application windows."""
206+
if sys.platform != "win32":
207+
return []
208+
209+
try:
210+
return _get_active_apps()
211+
except Exception:
212+
return []
213+
214+
215+
async def upload_windows_ui_dump():
216+
"""Continuously upload Windows UI dump if changed.
217+
218+
Only runs on Windows. Uploads to the server with key 'dom_windows'.
219+
"""
220+
global _TARGET_APP_NAME, _TARGET_APP_SET_TIME
221+
222+
if sys.platform != "win32":
223+
return
224+
225+
prev_xml_hash = ""
226+
while True:
227+
try:
228+
if _TARGET_APP_NAME and (time.time() - _TARGET_APP_SET_TIME) > 8 * 3600:
229+
_TARGET_APP_NAME = None
230+
231+
target_app = _TARGET_APP_NAME
232+
233+
if target_app:
234+
xml_content = await asyncio.to_thread(_get_ui_tree_xml, target_app)
235+
if xml_content:
236+
new_xml_hash = hashlib.sha256(xml_content.encode("utf-8")).hexdigest()
237+
238+
if prev_xml_hash != new_xml_hash:
239+
prev_xml_hash = new_xml_hash
240+
241+
url = (
242+
ConfigModule.get_config_value("Authentication", "server_address").strip()
243+
+ "/node_ai_contents/"
244+
)
245+
apiKey = ConfigModule.get_config_value("Authentication", "api-key").strip()
246+
247+
res = await asyncio.to_thread(
248+
requests.post,
249+
url,
250+
headers={"X-Api-Key": apiKey},
251+
json={
252+
"dom_win": {"dom": xml_content},
253+
"node_id": CommonUtil.MachineInfo().getLocalUser().lower(),
254+
"app_name": target_app,
255+
},
256+
timeout=10,
257+
)
258+
if res.ok:
259+
CommonUtil.ExecLog("", "Windows UI dump uploaded successfully", iLogLevel=1)
260+
except Exception as e:
261+
CommonUtil.ExecLog("", f"Error uploading Windows UI dump: {str(e)}", iLogLevel=3)
262+
263+
await asyncio.sleep(5)

0 commit comments

Comments
 (0)