Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions predicate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,17 @@
# Ordinal support (Phase 3)
from .ordinal import OrdinalIntent, boost_ordinal_elements, detect_ordinal_intent, select_by_ordinal
from .overlay import clear_overlay, show_overlay
from .overlay_dismissal import OverlayDismissResult, dismiss_overlays, dismiss_overlays_before_agent
from .permissions import PermissionPolicy
from .pruning import (
CategoryDetectionResult,
PrunedSnapshotContext,
PruningTaskCategory,
SkeletonDomNode,
classify_task_category,
prune_snapshot_for_task,
serialize_pruned_snapshot,
)
from .query import find, query
from .read import extract, extract_async, read, read_best_effort
from .recorder import Recorder, Trace, TraceStep, record
Expand Down Expand Up @@ -250,6 +260,10 @@
"screenshot",
"show_overlay",
"clear_overlay",
# Overlay dismissal (proactive popup/banner removal)
"OverlayDismissResult",
"dismiss_overlays",
"dismiss_overlays_before_agent",
# Text Search
"find_text_rect",
"TextRectSearchResult",
Expand Down Expand Up @@ -313,6 +327,13 @@
"save_storage_state",
# Formatting (v0.12.0+)
"format_snapshot_for_llm",
"CategoryDetectionResult",
"PrunedSnapshotContext",
"PruningTaskCategory",
"SkeletonDomNode",
"classify_task_category",
"prune_snapshot_for_task",
"serialize_pruned_snapshot",
# Agent Config (v0.12.0+)
"AgentConfig",
# Enums
Expand Down
42 changes: 40 additions & 2 deletions predicate/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,40 @@ async def get_url(self) -> str:
self._cached_url = url
return url

async def read_markdown(self, max_chars: int = 8000) -> str | None:
"""
Read page content as markdown for semantic understanding.

This extracts the page HTML and converts it to markdown format,
which is useful for LLM planning to understand page context
(e.g., product listings, form fields, navigation structure).

Args:
max_chars: Maximum characters to return (default 8000).
Truncates from the end if content exceeds this limit.

Returns:
Markdown string if successful, None if extraction fails.
"""
try:
page = getattr(self.backend, "page", None)
if page is None:
return None

# Import here to avoid circular dependency
from .read import _fallback_read_from_page_async

result = await _fallback_read_from_page_async(page, output_format="markdown")
if result is None or result.status != "success":
return None

content = result.content
if len(content) > max_chars:
content = content[:max_chars]
return content
except Exception:
return None

async def get_viewport_height(self) -> int:
"""
Get current viewport height in pixels.
Expand Down Expand Up @@ -398,19 +432,23 @@ async def click(self, element_id: int) -> None:

await self.record_action(f"CLICK({element_id})")

async def type(self, element_id: int, text: str) -> None:
async def type(self, element_id: int, text: str, *, delay_ms: float | None = None) -> None:
"""
Type text into an element.

Args:
element_id: Element ID from snapshot
text: Text to type
delay_ms: Optional delay between keystrokes in milliseconds
"""
# First click to focus
await self.click(element_id)

# Then type
await self.backend.type_text(text)
if delay_ms is None:
await self.backend.type_text(text)
else:
await self.backend.type_text(text, delay_ms=delay_ms)
await self.record_action(f"TYPE({element_id}, '{text[:20]}...')" if len(text) > 20 else f"TYPE({element_id}, '{text}')")

async def press(self, key: str) -> None:
Expand Down
7 changes: 6 additions & 1 deletion predicate/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
- RuntimeAgent (execution loop and bounded vision fallback)

Agent types:
- PredicateBrowserAgent: Single-executor agent with manual step definitions
- PredicateAgent: Branded alias for PlannerExecutorAgent (recommended for external use)
- PlannerExecutorAgent: Two-tier agent with LLM-generated plans
- PredicateBrowserAgent: Single-executor agent with manual step definitions

Task abstractions:
- AutomationTask: Generic task model for browser automation
Expand Down Expand Up @@ -67,6 +68,9 @@
get_config_preset,
)

# Branded alias for PlannerExecutorAgent
PredicateAgent = PlannerExecutorAgent

__all__ = [
# Automation Task
"AutomationTask",
Expand Down Expand Up @@ -95,6 +99,7 @@
"PlanStep",
"PlannerExecutorAgent",
"PlannerExecutorConfig",
"PredicateAgent", # Branded alias for PlannerExecutorAgent
"PredicateSpec",
"RecoveryNavigationConfig",
"RetryConfig",
Expand Down
26 changes: 26 additions & 0 deletions predicate/agents/automation_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,32 @@ class AutomationTask:
# Domain hints for heuristics (e.g., ["ecommerce", "amazon"])
domain_hints: tuple[str, ...] = field(default_factory=tuple)

# Force a specific pruning category (overrides auto-detection)
force_pruning_category: str | None = None

def pruning_category_hint(self):
"""
Return the pruning-oriented category for this task.

If force_pruning_category is set, returns that category directly.
Otherwise, uses rule-based normalization from task text and hints.
"""
from ..pruning import PruningTaskCategory, classify_task_category

# If a category is forced, return it directly
if self.force_pruning_category:
try:
return PruningTaskCategory(self.force_pruning_category)
except ValueError:
pass # Invalid category, fall through to auto-detection

return classify_task_category(
task_text=self.task,
current_url=self.starting_url,
domain_hints=self.domain_hints,
task_category=self.category,
).category

@classmethod
def from_webbench_task(cls, task: Any) -> "AutomationTask":
"""
Expand Down
Loading
Loading