diff --git a/CHANGELOG.md b/CHANGELOG.md
index 263a091..326725b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.3.1] - 2026-06-12
+
+### Added
+
+- 🌐 **Browser tools.** Browse the web from chat. Navigate pages, click elements, type into forms, take screenshots, and run JavaScript. Works with local Chrome (auto-launched), Firecrawl, or Browser-Use. Enable in Settings > Browser.
+- 🖼️ **Image understanding.** The AI can now read and describe images from your workspace. Open a screenshot or image file and it just works, across all providers.
+- 🔴 **Error toasts.** When something goes wrong during a response (API errors, model failures), you'll now see a clear error message in the chat and a toast notification instead of silent failures.
+
+### Fixed
+
+- 🔁 **Responses API multi-turn tool calling.** Fixed an issue where tool calls would stop after the first round when using OpenAI's Responses API. The AI now correctly loops through multiple tool calls as expected.
+- 💬 **`/new` command in Telegram/Discord.** Starting a new conversation with `/new` now actually creates a fresh chat instead of continuing the previous one.
+- 🛡️ **Responses API spec compliance.** Input messages, tool outputs, and error handling now fully follow the Open Responses specification, preventing unexpected 400 errors.
+
 ## [0.3.0] - 2026-06-12
 
 ### Added
diff --git a/cptr/app.py b/cptr/app.py
index 4898808..53dbea3 100644
--- a/cptr/app.py
+++ b/cptr/app.py
@@ -69,6 +69,15 @@ async def shutdown():
     bot_manager = getattr(app.state, "bot_manager", None)
     if bot_manager:
         await bot_manager.stop_all()
+    # Clean up browser sessions and launched Chrome
+    try:
+        from cptr.utils.browser.session import session_manager
+        from cptr.utils.browser.launcher import shutdown_browser
+
+        await session_manager.close_all()
+        await shutdown_browser()
+    except Exception:
+        pass
 
 
 # Auth middleware
diff --git a/cptr/frontend/package.json b/cptr/frontend/package.json
index d0a632e..923b45b 100644
--- a/cptr/frontend/package.json
+++ b/cptr/frontend/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "frontend",
 	"private": true,
-	"version": "0.3.0",
+	"version": "0.3.1",
 	"type": "module",
 	"scripts": {
 		"dev": "vite dev",
diff --git a/cptr/frontend/src/lib/apis/admin.ts b/cptr/frontend/src/lib/apis/admin.ts
index 10de548..c6243fb 100644
--- a/cptr/frontend/src/lib/apis/admin.ts
+++ b/cptr/frontend/src/lib/apis/admin.ts
@@ -54,8 +54,8 @@ export const getAdminConfig = async (): Promise<Record<string, unknown>> => {
 
 export const updateConfig = (config: Record<string, unknown>) =>
 	fetchJSON('/api/admin/config', {
-		method: 'PUT',
-		...jsonBody({ config })
+		...jsonBody({ config }),
+		method: 'PUT'
 	});
 
 // ── Connections ─────────────────────────────────────────────
diff --git a/cptr/frontend/src/lib/components/Icon.svelte b/cptr/frontend/src/lib/components/Icon.svelte
index fd661f1..71b7c15 100644
--- a/cptr/frontend/src/lib/components/Icon.svelte
+++ b/cptr/frontend/src/lib/components/Icon.svelte
@@ -333,5 +333,10 @@
 	{:else if name === 'signal'}
 		<path d="M8.5 11.5L11.5 14.5L16.5 9.5" />
 		<path d="M5 18L3.13036 4.91253C3.05646 4.39524 3.39389 3.91247 3.90398 3.79912L11.5661 2.09641C11.8519 2.03291 12.1481 2.03291 12.4339 2.09641L20.096 3.79912C20.6061 3.91247 20.9435 4.39524 20.8696 4.91252L19 18C18.9293 18.495 18.5 21.5 12 21.5C5.5 21.5 5.07071 18.495 5 18Z" />
+	{:else if name === 'browser'}
+		<path d="M22 12C22 6.47715 17.5228 2 12 2C6.47715 2 2 6.47715 2 12C2 17.5228 6.47715 22 12 22C17.5228 22 22 17.5228 22 12Z" />
+		<path d="M13 2.04932C13 2.04932 16 6 16 12C16 18 13 21.9507 13 21.9507" />
+		<path d="M11 21.9507C11 21.9507 8 18 8 12C8 6 11 2.04932 11 2.04932" />
+		<path d="M2 12H22" />
 	{/if}
 </svg>
diff --git a/cptr/frontend/src/lib/components/Settings/Browser.svelte b/cptr/frontend/src/lib/components/Settings/Browser.svelte
new file mode 100644
index 0000000..15a6905
--- /dev/null
+++ b/cptr/frontend/src/lib/components/Settings/Browser.svelte
@@ -0,0 +1,254 @@
+<script lang="ts">
+	import { toast } from 'svelte-sonner';
+	import ToggleSwitch from '../common/ToggleSwitch.svelte';
+	import Spinner from '../common/Spinner.svelte';
+	import { onMount } from 'svelte';
+	import { getAdminConfig, updateConfig } from '$lib/apis/admin';
+	import { t } from '$lib/i18n';
+
+	let loading = $state(true);
+	let saving = $state(false);
+	let testing = $state(false);
+	let testResult = $state<{ ok: boolean; message: string } | null>(null);
+
+	// Config state
+	let enabled = $state(false);
+	let provider = $state<'local' | 'firecrawl' | 'browser_use'>('local');
+	let cdpUrl = $state('http://localhost:9222');
+	let autoLaunch = $state(true);
+	let sessionTimeout = $state(10);
+	let firecrawlApiKey = $state('');
+	let firecrawlBaseUrl = $state('https://api.firecrawl.dev');
+	let browserUseApiKey = $state('');
+	let browserUseBaseUrl = $state('https://api.browser-use.com');
+
+	onMount(async () => {
+		try {
+			const config = await getAdminConfig();
+			enabled = config['browser.enabled'] === true || config['browser.enabled'] === 'true';
+			provider = (config['browser.provider'] as typeof provider) || 'local';
+			cdpUrl = (config['browser.cdp_url'] as string) || 'http://localhost:9222';
+			autoLaunch = config['browser.auto_launch'] !== false && config['browser.auto_launch'] !== 'false';
+			sessionTimeout = Number(config['browser.session_timeout_minutes']) || 10;
+			firecrawlApiKey = (config['browser.firecrawl_api_key'] as string) || '';
+			firecrawlBaseUrl = (config['browser.firecrawl_base_url'] as string) || 'https://api.firecrawl.dev';
+			browserUseApiKey = (config['browser.browser_use_api_key'] as string) || '';
+			browserUseBaseUrl = (config['browser.browser_use_base_url'] as string) || 'https://api.browser-use.com';
+		} catch {}
+		loading = false;
+	});
+
+	async function save() {
+		saving = true;
+		try {
+			await updateConfig({
+				'browser.enabled': enabled,
+				'browser.provider': provider,
+				'browser.cdp_url': cdpUrl,
+				'browser.auto_launch': autoLaunch,
+				'browser.session_timeout_minutes': sessionTimeout,
+				'browser.firecrawl_api_key': firecrawlApiKey,
+				'browser.firecrawl_base_url': firecrawlBaseUrl,
+				'browser.browser_use_api_key': browserUseApiKey,
+				'browser.browser_use_base_url': browserUseBaseUrl
+			});
+			toast.success($t('settings.saved'));
+		} catch {
+			toast.error('Failed to save browser settings');
+		} finally {
+			saving = false;
+		}
+	}
+
+	async function testConnection() {
+		testing = true;
+		testResult = null;
+		try {
+			const resp = await fetch(`${cdpUrl}/json/version`);
+			if (resp.ok) {
+				const data = await resp.json();
+				testResult = { ok: true, message: data.Browser || 'Connected' };
+			} else {
+				testResult = { ok: false, message: `HTTP ${resp.status}` };
+			}
+		} catch {
+			testResult = { ok: false, message: 'Could not connect' };
+		} finally {
+			testing = false;
+		}
+	}
+</script>
+
+<div class="flex flex-col min-h-full">
+	<h2 class="text-sm font-medium text-gray-900 dark:text-white mb-4">Browser</h2>
+
+	{#if loading}
+		<div class="flex justify-center py-8"><Spinner size={16} /></div>
+	{:else}
+		<!-- Enable -->
+		<h3 class="text-xs text-gray-400 dark:text-gray-600 mb-2">Enable</h3>
+
+		<div class="flex flex-col gap-2.5">
+			<label class="flex items-center justify-between cursor-pointer">
+				<span class="text-xs text-gray-600 dark:text-gray-400">Browser tools</span>
+				<ToggleSwitch value={enabled} onchange={(v) => { enabled = v; }} />
+			</label>
+			<p class="text-[11px] text-gray-400 dark:text-gray-600 -mt-1">
+				Give the AI access to a web browser for navigating pages, clicking elements, and taking screenshots.
+			</p>
+		</div>
+
+		{#if enabled}
+			<!-- Provider -->
+			<h3 class="text-xs text-gray-400 dark:text-gray-600 mb-2 mt-5">Provider</h3>
+
+			<div class="flex gap-1">
+				{#each [
+					{ value: 'local' as const, label: 'Local CDP' },
+					{ value: 'firecrawl' as const, label: 'Firecrawl' },
+					{ value: 'browser_use' as const, label: 'Browser-Use' }
+				] as opt}
+					<button
+						class="flex items-center gap-1.5 h-7 px-2.5 rounded-lg text-xs transition-colors duration-100
+						{provider === opt.value
+							? 'bg-gray-200/50 dark:bg-white/8 text-gray-900 dark:text-white font-medium'
+							: 'text-gray-500 hover:text-gray-700 dark:hover:text-gray-300'}"
+						onclick={() => { provider = opt.value; }}
+					>
+						{opt.label}
+					</button>
+				{/each}
+			</div>
+			<p class="text-[11px] text-gray-400 dark:text-gray-600 mt-1">
+				{#if provider === 'local'}
+					Connects to Chrome via DevTools Protocol. Full interactive browsing with clicking, typing, and screenshots.
+				{:else if provider === 'firecrawl'}
+					Cloud API that converts web pages to markdown. Fast extraction, no interactive browsing.
+				{:else}
+					Cloud API for LLM-driven browser tasks. Describe what you need in natural language.
+				{/if}
+			</p>
+
+			<!-- Local CDP settings -->
+			{#if provider === 'local'}
+				<h3 class="text-xs text-gray-400 dark:text-gray-600 mb-2 mt-5">Connection</h3>
+
+				<div class="flex flex-col gap-2.5">
+					<label class="flex items-center justify-between cursor-pointer">
+						<div>
+							<span class="text-xs text-gray-600 dark:text-gray-400">Auto-launch Chrome</span>
+							<p class="text-[10px] text-gray-400 dark:text-gray-600">Start a headless Chrome if none is running</p>
+						</div>
+						<ToggleSwitch value={autoLaunch} onchange={(v) => { autoLaunch = v; }} />
+					</label>
+
+					<div>
+						<label class="text-xs text-gray-600 dark:text-gray-400" for="cdp-url">CDP URL</label>
+						<div class="flex gap-1.5 mt-1">
+							<input
+								id="cdp-url"
+								type="text"
+								bind:value={cdpUrl}
+								placeholder="http://localhost:9222"
+								class="flex-1 h-7 px-2 rounded-lg text-xs bg-gray-100 dark:bg-white/6 text-gray-700 dark:text-gray-300 border border-gray-200 dark:border-white/8 outline-none focus:border-blue-400 dark:focus:border-blue-500 transition-colors"
+							/>
+							<button
+								class="h-7 px-2.5 rounded-lg text-xs bg-gray-200/50 dark:bg-white/8 text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-white transition-colors disabled:opacity-50"
+								onclick={() => testConnection()}
+								disabled={testing}
+							>
+								{testing ? '...' : 'Test'}
+							</button>
+						</div>
+						{#if testResult}
+							<p class="text-[11px] mt-1 {testResult.ok ? 'text-emerald-600 dark:text-emerald-400' : 'text-red-500'}">
+								{testResult.message}
+							</p>
+						{/if}
+					</div>
+
+					<div>
+						<label class="text-xs text-gray-600 dark:text-gray-400" for="session-timeout">Session timeout</label>
+						<div class="flex items-center gap-1.5 mt-1">
+							<input
+								id="session-timeout"
+								type="number"
+								bind:value={sessionTimeout}
+								min="1"
+								max="120"
+								class="w-16 h-7 px-2 rounded-lg text-xs bg-gray-100 dark:bg-white/6 text-gray-700 dark:text-gray-300 border border-gray-200 dark:border-white/8 outline-none focus:border-blue-400 dark:focus:border-blue-500 transition-colors"
+							/>
+							<span class="text-[11px] text-gray-400 dark:text-gray-600">minutes</span>
+						</div>
+					</div>
+				</div>
+			{/if}
+
+			<!-- Firecrawl settings -->
+			{#if provider === 'firecrawl'}
+				<h3 class="text-xs text-gray-400 dark:text-gray-600 mb-2 mt-5">Firecrawl</h3>
+
+				<div class="flex flex-col gap-2.5">
+					<div>
+						<label class="text-xs text-gray-600 dark:text-gray-400" for="fc-key">API Key</label>
+						<input
+							id="fc-key"
+							type="password"
+							bind:value={firecrawlApiKey}
+							placeholder="fc-..."
+							class="w-full mt-1 h-7 px-2 rounded-lg text-xs bg-gray-100 dark:bg-white/6 text-gray-700 dark:text-gray-300 border border-gray-200 dark:border-white/8 outline-none focus:border-blue-400 dark:focus:border-blue-500 transition-colors"
+						/>
+					</div>
+					<div>
+						<label class="text-xs text-gray-600 dark:text-gray-400" for="fc-url">Base URL</label>
+						<input
+							id="fc-url"
+							type="text"
+							bind:value={firecrawlBaseUrl}
+							placeholder="https://api.firecrawl.dev"
+							class="w-full mt-1 h-7 px-2 rounded-lg text-xs bg-gray-100 dark:bg-white/6 text-gray-700 dark:text-gray-300 border border-gray-200 dark:border-white/8 outline-none focus:border-blue-400 dark:focus:border-blue-500 transition-colors"
+						/>
+						<p class="text-[11px] text-gray-400 dark:text-gray-600 mt-1">Change for self-hosted Firecrawl instances</p>
+					</div>
+				</div>
+			{/if}
+
+			<!-- Browser-Use settings -->
+			{#if provider === 'browser_use'}
+				<h3 class="text-xs text-gray-400 dark:text-gray-600 mb-2 mt-5">Browser-Use</h3>
+
+				<div class="flex flex-col gap-2.5">
+					<div>
+						<label class="text-xs text-gray-600 dark:text-gray-400" for="bu-key">API Key</label>
+						<input
+							id="bu-key"
+							type="password"
+							bind:value={browserUseApiKey}
+							placeholder="bu-..."
+							class="w-full mt-1 h-7 px-2 rounded-lg text-xs bg-gray-100 dark:bg-white/6 text-gray-700 dark:text-gray-300 border border-gray-200 dark:border-white/8 outline-none focus:border-blue-400 dark:focus:border-blue-500 transition-colors"
+						/>
+					</div>
+					<div>
+						<label class="text-xs text-gray-600 dark:text-gray-400" for="bu-url">Base URL</label>
+						<input
+							id="bu-url"
+							type="text"
+							bind:value={browserUseBaseUrl}
+							placeholder="https://api.browser-use.com"
+							class="w-full mt-1 h-7 px-2 rounded-lg text-xs bg-gray-100 dark:bg-white/6 text-gray-700 dark:text-gray-300 border border-gray-200 dark:border-white/8 outline-none focus:border-blue-400 dark:focus:border-blue-500 transition-colors"
+						/>
+					</div>
+				</div>
+			{/if}
+		{/if}
+
+		<!-- Save -->
+		<div class="mt-auto pt-6 flex justify-end">
+			<button
+				class="text-[13px] text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-white transition-colors duration-100 disabled:opacity-50"
+				onclick={() => save()}
+				disabled={saving}
+			>{$t('settings.save')}</button>
+		</div>
+	{/if}
+</div>
diff --git a/cptr/frontend/src/lib/components/SettingsModal.svelte b/cptr/frontend/src/lib/components/SettingsModal.svelte
index a5de3f1..4dc97fa 100644
--- a/cptr/frontend/src/lib/components/SettingsModal.svelte
+++ b/cptr/frontend/src/lib/components/SettingsModal.svelte
@@ -4,6 +4,7 @@
 	import General from './Settings/General.svelte';
 	import Account from './Settings/Account.svelte';
 	import Keyboard from './Settings/Keyboard.svelte';
+	import Browser from './Settings/Browser.svelte';
 	import About from './Settings/About.svelte';
 	import Users from './Admin/Users.svelte';
 	import Connections from './Admin/Connections.svelte';
@@ -16,6 +17,7 @@
 	type Tab =
 		| 'general'
 		| 'keyboard'
+		| 'browser'
 		| 'account'
 		| 'about'
 		| 'users'
@@ -47,6 +49,7 @@
 		{ id: 'connections', label: $t('admin.connections'), icon: 'plug' },
 		{ id: 'models', label: $t('admin.models'), icon: 'cube' },
 		{ id: 'messaging', label: $t('admin.messaging'), icon: 'chat-bubble' },
+		{ id: 'browser', label: 'Browser', icon: 'browser' },
 		{ id: 'admin_settings', label: $t('settings.configuration'), icon: 'shield' }
 	]);
 </script>
@@ -106,6 +109,8 @@
 			<General />
 		{:else if activeTab === 'keyboard'}
 			<Keyboard />
+		{:else if activeTab === 'browser'}
+			<Browser />
 		{:else if activeTab === 'account'}
 			<Account />
 		{:else if activeTab === 'about'}
diff --git a/cptr/frontend/src/lib/components/chat/ChatInput.svelte b/cptr/frontend/src/lib/components/chat/ChatInput.svelte
index 54d5d12..7a60586 100644
--- a/cptr/frontend/src/lib/components/chat/ChatInput.svelte
+++ b/cptr/frontend/src/lib/components/chat/ChatInput.svelte
@@ -401,7 +401,7 @@
 		if (!child) return;
 		const popupHeight = child.offsetHeight || 200;
 		child.style.position = 'fixed';
-		child.style.left = `${Math.max(8, Math.min(rect.left, window.innerWidth - 340))}px`;
+		child.style.left = `${Math.max(8, Math.min(rect.left, window.innerWidth - 280))}px`;
 		child.style.top = `${rect.top - popupHeight - 8}px`;
 	}
 
diff --git a/cptr/frontend/src/lib/components/chat/ChatPanel.svelte b/cptr/frontend/src/lib/components/chat/ChatPanel.svelte
index 662950d..e0be5c1 100644
--- a/cptr/frontend/src/lib/components/chat/ChatPanel.svelte
+++ b/cptr/frontend/src/lib/components/chat/ChatPanel.svelte
@@ -38,6 +38,7 @@
 	import AssistantMessage from './AssistantMessage.svelte';
 	import ChatHistory from './ChatHistory.svelte';
 	import Spinner from '../common/Spinner.svelte';
+	import { toast } from 'svelte-sonner';
 
 	interface Props {
 		workspace: string;
@@ -307,6 +308,7 @@
 		delta?: string;
 		output?: any;
 		done?: boolean;
+		error?: string;
 		queue_processed?: boolean;
 		title?: string;
 	}) {
@@ -375,6 +377,9 @@
 			}
 			allMessages = [...allMessages];
 		}
+		if (data.error) {
+			toast.error(data.error, { duration: 8000 });
+		}
 		if (data.done) {
 			// Clear streaming indicator for this tab
 			if (tabId) {
diff --git a/cptr/frontend/src/lib/components/chat/SkillSuggestionPopup.svelte b/cptr/frontend/src/lib/components/chat/SkillSuggestionPopup.svelte
index 97ea264..ea9a5ee 100644
--- a/cptr/frontend/src/lib/components/chat/SkillSuggestionPopup.svelte
+++ b/cptr/frontend/src/lib/components/chat/SkillSuggestionPopup.svelte
@@ -27,7 +27,7 @@
 </script>
 
 <div
-	class="fixed z-50 w-72 max-h-48 overflow-y-auto rounded-xl bg-white dark:bg-[#1a1a1a] border border-gray-150 dark:border-white/6 shadow-xl p-0.5"
+	class="fixed z-50 w-64 max-h-48 overflow-y-auto rounded-xl bg-white dark:bg-[#1a1a1a] border border-gray-150 dark:border-white/6 shadow-xl p-0.5"
 >
 	{#if items.length === 0}
 		<div class="px-3 py-2 text-xs text-gray-400 dark:text-gray-600">No skills found</div>
diff --git a/cptr/utils/ai.py b/cptr/utils/ai.py
index e162836..7845eb2 100644
--- a/cptr/utils/ai.py
+++ b/cptr/utils/ai.py
@@ -12,6 +12,7 @@
 import asyncio
 import json
 import logging
+import uuid
 from collections.abc import AsyncIterator
 from typing import Dict, List
 
@@ -187,6 +188,24 @@ def _to_anthropic_messages(messages: list[dict]) -> list[dict]:
             content = formatted_content
         if role == "tool":
             # tool result → Anthropic tool_result block
+            # Content may be a string or a list of blocks (multimodal image results)
+            if isinstance(content, list):
+                # Multimodal tool result — convert blocks to Anthropic format
+                tool_content = []
+                for block in content:
+                    if block.get("type") == "text":
+                        tool_content.append({"type": "text", "text": block.get("text", "")})
+                    elif block.get("type") == "image":
+                        tool_content.append({
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": block.get("media_type", "image/jpeg"),
+                                "data": block.get("base64", ""),
+                            }
+                        })
+            else:
+                tool_content = content
             result.append(
                 {
                     "role": "user",
@@ -194,7 +213,7 @@ def _to_anthropic_messages(messages: list[dict]) -> list[dict]:
                         {
                             "type": "tool_result",
                             "tool_use_id": m.get("tool_call_id", ""),
-                            "content": content,
+                            "content": tool_content,
                         }
                     ],
                 }
@@ -338,6 +357,9 @@ def _to_openai_messages(messages: list[dict], instructions: str) -> list[dict]:
             for block in content:
                 if block.get("type") == "text":
                     formatted_content.append({"type": "text", "text": block.get("text", "")})
+                elif block.get("type") == "image_url":
+                    # Already in OpenAI-native format (e.g. from image extraction)
+                    formatted_content.append(block)
                 elif block.get("type") == "image":
                     data_uri = f"data:{block.get('media_type', 'image/jpeg')};base64,{block.get('base64', '')}"
                     formatted_content.append({
@@ -460,22 +482,39 @@ def _to_responses_input(messages: list[dict], instructions: str) -> list[dict]:
         if role == "system":
             continue
         if role == "tool":
+            content = m.get("content", "")
+            if isinstance(content, list):
+                # Multimodal tool content — extract text only for output
+                # (images are handled in the agentic loop)
+                text_parts = []
+                for block in content:
+                    if block.get("type") == "text":
+                        text_parts.append(block.get("text", ""))
+                content = "\n".join(text_parts)
             items.append(
                 {
                     "type": "function_call_output",
                     "call_id": m.get("tool_call_id", ""),
-                    "output": m.get("content", ""),
+                    "output": content,
+                    "status": "completed",
                 }
             )
         elif role == "assistant" and m.get("tool_calls"):
             for tc in m["tool_calls"]:
+                args = tc["function"].get("arguments", "{}")
+                call_id = tc.get("id", "")
+                # Responses API requires id to start with "fc_"
+                fc_id = tc.get("fc_id", "")
+                if not fc_id or not fc_id.startswith("fc_"):
+                    fc_id = f"fc_{call_id.replace('call_', '', 1) or uuid.uuid4().hex}"
                 items.append(
                     {
                         "type": "function_call",
-                        "id": tc.get("id", ""),
-                        "call_id": tc.get("id", ""),
+                        "id": fc_id,
+                        "call_id": call_id,
                         "name": tc["function"]["name"],
-                        "arguments": tc["function"].get("arguments", "{}"),
+                        "arguments": args if isinstance(args, str) else json.dumps(args),
+                        "status": "completed",
                     }
                 )
         else:
@@ -486,15 +525,14 @@ def _to_responses_input(messages: list[dict], instructions: str) -> list[dict]:
                     if block.get("type") == "text":
                         formatted_content.append({"type": "input_text", "text": block.get("text", "")})
                     elif block.get("type") == "image":
-                        # Not all models support input_image, but this is the Responses API spec
                         data_uri = f"data:{block.get('media_type', 'image/jpeg')};base64,{block.get('base64', '')}"
                         formatted_content.append({
                             "type": "input_image",
                             "image_url": data_uri
                         })
-                items.append({"role": role, "content": formatted_content})
+                items.append({"type": "message", "role": role, "content": formatted_content})
             else:
-                items.append({"role": role, "content": content})
+                items.append({"type": "message", "role": role, "content": content})
     return items
 
 
@@ -529,17 +567,26 @@ async def stream_openai_responses(
         try:
             async with httpx.AsyncClient(timeout=_STREAM_TIMEOUT) as client:
                 logger.info(
-                    "[stream] openai responses POST %s/responses model=%s", url, form_data.model
+                    "[stream] openai responses POST %s/responses model=%s input_items=%d types=%s",
+                    url, form_data.model,
+                    len(body.get("input", [])),
+                    [i.get("type", i.get("role", "?")) for i in body.get("input", [])],
                 )
                 async with client.stream(
                     "POST", f"{url}/responses", json=body, headers=headers
                 ) as resp:
                     logger.info("[stream] openai responses status=%s", resp.status_code)
+                    if resp.status_code >= 400:
+                        error_body = await resp.aread()
+                        logger.error("[stream] openai responses error body: %s", error_body.decode(errors="replace"))
                     resp.raise_for_status()
                     async for line in resp.aiter_lines():
                         if not line.startswith("data: "):
                             continue
-                        event = json.loads(line[6:])
+                        raw = line[6:]
+                        if raw == "[DONE]":
+                            break
+                        event = json.loads(raw)
                         etype = event.get("type")
 
                         if etype == "response.output_text.delta":
@@ -552,11 +599,17 @@ async def stream_openai_responses(
                                 emitted = True
                                 yield {
                                     "type": "tool_call",
+                                    "id": item.get("id", ""),
                                     "call_id": item["call_id"],
                                     "name": item["name"],
                                     "arguments": json.loads(item["arguments"]),
                                 }
 
+                        elif etype == "response.failed":
+                            error = event.get("response", {}).get("error", {})
+                            msg = error.get("message", "Response failed")
+                            raise RuntimeError(f"Responses API error: {msg}")
+
                         elif etype == "response.completed":
                             usage = event.get("response", {}).get("usage", {})
                             if usage:
diff --git a/cptr/utils/bridge.py b/cptr/utils/bridge.py
index 4127ffd..32b1772 100644
--- a/cptr/utils/bridge.py
+++ b/cptr/utils/bridge.py
@@ -174,15 +174,18 @@ async def delete_bot_config(bot_id: str) -> bool:
 
 
 async def find_chat_for_thread(bot_id: str, external_thread_id: str) -> str | None:
-    """Find an existing cptr chat_id for a platform thread.
+    """Find the most recent cptr chat_id for a platform thread.
 
-    Scans chats with matching bridge metadata.  This is fine for the
-    small number of active bridge threads (typically < 100).
+    Scans chats with matching bridge metadata and returns the newest one,
+    so /new (which creates a new chat with the same thread ID) takes effect.
     """
     from cptr.models import Chat
     from cptr.utils.db import get_db
     from sqlalchemy import select
 
+    best_id = None
+    best_ts = -1
+
     async with await get_db() as db:
         result = await db.execute(
             select(Chat).where(Chat.user_id.isnot(None))
@@ -193,8 +196,11 @@ async def find_chat_for_thread(bot_id: str, external_thread_id: str) -> str | No
                 meta.get("bridge_bot_id") == bot_id
                 and meta.get("bridge_external_thread_id") == external_thread_id
             ):
-                return chat.id
-    return None
+                ts = getattr(chat, "created_at", 0) or 0
+                if ts > best_ts:
+                    best_ts = ts
+                    best_id = chat.id
+    return best_id
 
 
 # ── BotManager ───────────────────────────────────────────────
diff --git a/cptr/utils/browser/__init__.py b/cptr/utils/browser/__init__.py
new file mode 100644
index 0000000..defe8f9
--- /dev/null
+++ b/cptr/utils/browser/__init__.py
@@ -0,0 +1 @@
+"""Browser automation package — pluggable providers behind unified tool interface."""
diff --git a/cptr/utils/browser/browser_use.py b/cptr/utils/browser/browser_use.py
new file mode 100644
index 0000000..1c4c439
--- /dev/null
+++ b/cptr/utils/browser/browser_use.py
@@ -0,0 +1,47 @@
+"""Browser-Use cloud provider — LLM-driven browser tasks via REST API.
+
+Pure httpx, no SDK dependency.
+"""
+
+from __future__ import annotations
+
+import logging
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_BASE_URL = "https://api.browser-use.com"
+
+
+async def browse(
+    task: str,
+    api_key: str,
+    base_url: str = DEFAULT_BASE_URL,
+) -> str:
+    """Run a natural language browser task and return the result.
+
+    Uses POST /v1/run.
+    """
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+    payload = {"task": task}
+
+    async with httpx.AsyncClient(timeout=120) as http:
+        resp = await http.post(
+            f"{base_url.rstrip('/')}/v1/run",
+            json=payload,
+            headers=headers,
+        )
+        resp.raise_for_status()
+        data = resp.json()
+
+    result = data.get("result", data.get("output", ""))
+
+    if not result:
+        return f"Browser-Use returned no result for task: {task}"
+
+    # Trim if needed
+    if isinstance(result, str) and len(result) > 50_000:
+        result = result[:50_000] + "\n\n[... truncated]"
+
+    return str(result)
diff --git a/cptr/utils/browser/cdp.py b/cptr/utils/browser/cdp.py
new file mode 100644
index 0000000..2c49056
--- /dev/null
+++ b/cptr/utils/browser/cdp.py
@@ -0,0 +1,330 @@
+"""Chrome DevTools Protocol client over WebSocket.
+
+Connects to a running Chrome/Chromium instance via CDP and provides methods for
+navigation, accessibility tree snapshots with ref IDs, clicking, typing,
+screenshots, and JS evaluation. Zero external dependencies beyond websockets.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import json
+import logging
+from typing import Any
+
+import websockets
+
+logger = logging.getLogger(__name__)
+
+# Ref ID prefix used in accessibility tree snapshots
+_REF_PREFIX = "@e"
+
+
+class CDPClient:
+    """Low-level Chrome DevTools Protocol client."""
+
+    def __init__(self, ws: Any, target_id: str) -> None:
+        self._ws = ws
+        self._target_id = target_id
+        self._msg_id = 0
+        self._ref_map: dict[str, int] = {}  # ref_id -> backend_node_id
+        self._closed = False
+
+    # ── Connection ─────────────────────────────────────────
+
+    @classmethod
+    async def connect(cls, cdp_url: str = "http://localhost:9222") -> "CDPClient":
+        """Connect to a Chrome instance via CDP.
+
+        Discovers the WebSocket debug URL from the /json/version endpoint,
+        then opens a WebSocket connection to the first available page target.
+        """
+        import httpx
+
+        base = cdp_url.rstrip("/")
+
+        # Get available targets (pages/tabs)
+        async with httpx.AsyncClient() as http:
+            resp = await http.get(f"{base}/json/list", timeout=5)
+            targets = resp.json()
+
+        # Find a page target, or create one
+        page_target = None
+        for t in targets:
+            if t.get("type") == "page":
+                page_target = t
+                break
+
+        if not page_target:
+            # Create a new tab
+            async with httpx.AsyncClient() as http:
+                resp = await http.put(f"{base}/json/new?about:blank", timeout=5)
+                page_target = resp.json()
+
+        ws_url = page_target["webSocketDebuggerUrl"]
+        target_id = page_target["id"]
+
+        ws = await websockets.connect(ws_url, max_size=50 * 1024 * 1024)
+
+        client = cls(ws, target_id)
+
+        # Enable required domains
+        await client._send("Page.enable")
+        await client._send("DOM.enable")
+        await client._send("Accessibility.enable")
+        await client._send("Runtime.enable")
+
+        return client
+
+    # ── Low-level CDP messaging ────────────────────────────
+
+    async def _send(self, method: str, params: dict | None = None) -> dict:
+        """Send a CDP command and wait for the result."""
+        self._msg_id += 1
+        msg_id = self._msg_id
+        payload = {"id": msg_id, "method": method}
+        if params:
+            payload["params"] = params
+
+        await self._ws.send(json.dumps(payload))
+
+        # Wait for matching response (skip events)
+        while True:
+            raw = await asyncio.wait_for(self._ws.recv(), timeout=30)
+            data = json.loads(raw)
+            if data.get("id") == msg_id:
+                if "error" in data:
+                    raise RuntimeError(f"CDP error: {data['error'].get('message', data['error'])}")
+                return data.get("result", {})
+            # Ignore events (no "id" field)
+
+    # ── Navigation ─────────────────────────────────────────
+
+    async def navigate(self, url: str) -> dict:
+        """Navigate to a URL and wait for the page to load."""
+        result = await self._send("Page.navigate", {"url": url})
+
+        # Wait for load event
+        while True:
+            raw = await asyncio.wait_for(self._ws.recv(), timeout=30)
+            data = json.loads(raw)
+            if data.get("method") == "Page.loadEventFired":
+                break
+
+        # Small delay for DOM to settle
+        await asyncio.sleep(0.5)
+
+        # Get page title
+        title_result = await self._send(
+            "Runtime.evaluate", {"expression": "document.title"}
+        )
+        title = title_result.get("result", {}).get("value", "")
+
+        return {"url": url, "title": title, "frame_id": result.get("frameId")}
+
+    # ── Accessibility tree snapshot ────────────────────────
+
+    async def snapshot(self) -> str:
+        """Capture the accessibility tree and return a text representation with ref IDs.
+
+        Interactive elements (links, buttons, inputs, etc.) are assigned ref IDs
+        like @e1, @e2 that can be used with click() and type_text().
+        """
+        result = await self._send("Accessibility.getFullAXTree")
+        nodes = result.get("nodes", [])
+
+        self._ref_map.clear()
+        ref_counter = 0
+        lines: list[str] = []
+
+        # Interactive roles that get ref IDs
+        interactive_roles = {
+            "link", "button", "textbox", "searchbox", "combobox",
+            "checkbox", "radio", "tab", "menuitem", "option",
+            "switch", "slider", "spinbutton", "textfield",
+        }
+
+        for node in nodes:
+            role_data = node.get("role", {})
+            role = role_data.get("value", "") if isinstance(role_data, dict) else str(role_data)
+            if not role or role in ("none", "generic", "InlineTextBox", "StaticText"):
+                continue
+
+            name_data = node.get("name", {})
+            name = name_data.get("value", "") if isinstance(name_data, dict) else str(name_data)
+            if not name and role not in interactive_roles:
+                continue
+
+            # Build indent based on depth (simplified: flat for now)
+            depth = 0
+            for prop in node.get("properties", []):
+                if isinstance(prop, dict) and prop.get("name") == "level":
+                    depth = int(prop.get("value", {}).get("value", 0))
+                    break
+
+            indent = "  " * min(depth, 4)
+
+            # Assign ref ID to interactive elements
+            ref_label = ""
+            if role.lower() in interactive_roles:
+                ref_counter += 1
+                ref_id = f"{_REF_PREFIX}{ref_counter}"
+                backend_node_id = node.get("backendDOMNodeId")
+                if backend_node_id:
+                    self._ref_map[ref_id] = backend_node_id
+                ref_label = f" {ref_id}"
+
+            # Format: [role @ref] Name
+            display_name = f" {name}" if name else ""
+            lines.append(f"{indent}[{role}{ref_label}]{display_name}")
+
+        if not lines:
+            return "[empty page]"
+
+        return "\n".join(lines)
+
+    # ── Interaction ────────────────────────────────────────
+
+    async def click(self, ref: str) -> None:
+        """Click an element identified by its ref ID from the latest snapshot."""
+        ref = ref.strip()
+        if not ref.startswith(_REF_PREFIX):
+            ref = f"{_REF_PREFIX}{ref}"
+
+        backend_node_id = self._ref_map.get(ref)
+        if not backend_node_id:
+            raise ValueError(f"Unknown ref '{ref}'. Run browser_snapshot() first to get valid ref IDs.")
+
+        # Resolve to a remote object
+        result = await self._send(
+            "DOM.resolveNode", {"backendNodeId": backend_node_id}
+        )
+        object_id = result.get("object", {}).get("objectId")
+        if not object_id:
+            raise RuntimeError(f"Could not resolve element for ref {ref}")
+
+        # Scroll into view
+        try:
+            await self._send(
+                "DOM.scrollIntoViewIfNeeded", {"backendNodeId": backend_node_id}
+            )
+        except RuntimeError:
+            pass
+
+        # Get box model for click coordinates
+        try:
+            box = await self._send(
+                "DOM.getBoxModel", {"backendNodeId": backend_node_id}
+            )
+            content = box.get("model", {}).get("content", [])
+            if len(content) >= 4:
+                x = (content[0] + content[2]) / 2
+                y = (content[1] + content[5]) / 2
+            else:
+                x, y = 0, 0
+        except RuntimeError:
+            # Fallback: use JS click
+            await self._send(
+                "Runtime.callFunctionOn",
+                {"objectId": object_id, "functionDeclaration": "function() { this.click(); }"},
+            )
+            return
+
+        # Dispatch mouse events
+        for event_type in ("mousePressed", "mouseReleased"):
+            await self._send(
+                "Input.dispatchMouseEvent",
+                {
+                    "type": event_type,
+                    "x": x,
+                    "y": y,
+                    "button": "left",
+                    "clickCount": 1,
+                },
+            )
+
+        # Wait for potential navigation
+        await asyncio.sleep(0.3)
+
+    async def type_text(self, ref: str, text: str) -> None:
+        """Type text into an element identified by its ref ID."""
+        # Focus the element first
+        ref = ref.strip()
+        if not ref.startswith(_REF_PREFIX):
+            ref = f"{_REF_PREFIX}{ref}"
+
+        backend_node_id = self._ref_map.get(ref)
+        if not backend_node_id:
+            raise ValueError(f"Unknown ref '{ref}'. Run browser_snapshot() first.")
+
+        await self._send("DOM.focus", {"backendNodeId": backend_node_id})
+
+        # Clear existing content
+        await self._send(
+            "Input.dispatchKeyEvent",
+            {"type": "keyDown", "key": "a", "modifiers": 2},  # Ctrl+A / Cmd+A
+        )
+        await self._send(
+            "Input.dispatchKeyEvent",
+            {"type": "keyUp", "key": "a", "modifiers": 2},
+        )
+
+        # Type each character
+        for char in text:
+            await self._send(
+                "Input.dispatchKeyEvent",
+                {"type": "keyDown", "key": char, "text": char},
+            )
+            await self._send(
+                "Input.dispatchKeyEvent",
+                {"type": "keyUp", "key": char},
+            )
+
+    async def scroll(self, direction: str = "down", amount: int = 3) -> None:
+        """Scroll the page. Direction: 'up' or 'down'."""
+        delta_y = 300 * amount * (1 if direction == "down" else -1)
+        await self._send(
+            "Input.dispatchMouseEvent",
+            {"type": "mouseWheel", "x": 400, "y": 400, "deltaX": 0, "deltaY": delta_y},
+        )
+        await asyncio.sleep(0.3)
+
+    # ── Observation ────────────────────────────────────────
+
+    async def screenshot(self) -> bytes:
+        """Capture a screenshot of the current viewport. Returns PNG bytes."""
+        result = await self._send(
+            "Page.captureScreenshot", {"format": "png", "quality": 80}
+        )
+        return base64.b64decode(result["data"])
+
+    async def get_text(self) -> str:
+        """Extract visible text content from the page."""
+        result = await self._send(
+            "Runtime.evaluate",
+            {"expression": "document.body?.innerText || ''"},
+        )
+        return result.get("result", {}).get("value", "")
+
+    async def evaluate(self, expression: str) -> str:
+        """Evaluate a JavaScript expression and return the result."""
+        result = await self._send(
+            "Runtime.evaluate",
+            {"expression": expression, "returnByValue": True},
+        )
+        value = result.get("result", {})
+        if value.get("type") == "undefined":
+            return "undefined"
+        return str(value.get("value", value.get("description", "")))
+
+    # ── Lifecycle ──────────────────────────────────────────
+
+    async def close(self) -> None:
+        """Close the CDP connection."""
+        if not self._closed:
+            self._closed = True
+            try:
+                await self._ws.close()
+            except Exception:
+                pass
diff --git a/cptr/utils/browser/firecrawl.py b/cptr/utils/browser/firecrawl.py
new file mode 100644
index 0000000..c259bd4
--- /dev/null
+++ b/cptr/utils/browser/firecrawl.py
@@ -0,0 +1,54 @@
+"""Firecrawl browser provider — page-to-markdown via REST API.
+
+Pure httpx, no SDK dependency. Supports both cloud (api.firecrawl.dev) and
+self-hosted instances.
+"""
+
+from __future__ import annotations
+
+import logging
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_BASE_URL = "https://api.firecrawl.dev"
+
+
+async def scrape(
+    url: str,
+    api_key: str,
+    base_url: str = DEFAULT_BASE_URL,
+    format: str = "markdown",
+) -> str:
+    """Scrape a single page and return content as markdown.
+
+    Uses POST /v1/scrape.
+    """
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+    payload = {"url": url, "formats": [format]}
+
+    async with httpx.AsyncClient(timeout=30) as http:
+        resp = await http.post(
+            f"{base_url.rstrip('/')}/v1/scrape",
+            json=payload,
+            headers=headers,
+        )
+        resp.raise_for_status()
+        data = resp.json()
+
+    if not data.get("success"):
+        error = data.get("error", "Unknown error")
+        return f"Firecrawl error: {error}"
+
+    result = data.get("data", {})
+    content = result.get(format, result.get("markdown", ""))
+
+    if not content:
+        return f"Firecrawl returned empty content for {url}"
+
+    # Trim to reasonable size for LLM context
+    if len(content) > 50_000:
+        content = content[:50_000] + "\n\n[... truncated]"
+
+    return content
diff --git a/cptr/utils/browser/launcher.py b/cptr/utils/browser/launcher.py
new file mode 100644
index 0000000..d3354e8
--- /dev/null
+++ b/cptr/utils/browser/launcher.py
@@ -0,0 +1,168 @@
+"""Chrome/Chromium discovery and auto-launch.
+
+Finds a running Chrome instance or launches one headless with a debug port.
+Supports macOS and Linux. Called automatically when browser tools are invoked.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+import shutil
+import tempfile
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Common Chrome/Chromium binary paths by platform
+_CHROME_PATHS_MACOS = [
+    "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+    "/Applications/Chromium.app/Contents/MacOS/Chromium",
+    "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
+    "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
+]
+
+_CHROME_PATHS_LINUX = [
+    "google-chrome",
+    "google-chrome-stable",
+    "chromium",
+    "chromium-browser",
+    "brave-browser",
+    "microsoft-edge",
+]
+
+# Track launched process so we can kill it on shutdown
+_launched_process: asyncio.subprocess.Process | None = None
+_user_data_dir: str | None = None
+
+
+def _find_chrome() -> str | None:
+    """Find a Chrome/Chromium binary on this system."""
+    import platform
+
+    if platform.system() == "Darwin":
+        for path in _CHROME_PATHS_MACOS:
+            if Path(path).exists():
+                return path
+        # Also check PATH
+        for name in ("google-chrome", "chromium"):
+            found = shutil.which(name)
+            if found:
+                return found
+    else:
+        # Linux / other
+        for name in _CHROME_PATHS_LINUX:
+            found = shutil.which(name)
+            if found:
+                return found
+
+    return None
+
+
+async def _probe_cdp(base_url: str) -> bool:
+    """Check if a CDP endpoint is responding."""
+    import httpx
+
+    try:
+        async with httpx.AsyncClient() as http:
+            resp = await http.get(f"{base_url}/json/version", timeout=3)
+            data = resp.json()
+            logger.info(
+                "Found Chrome %s at %s",
+                data.get("Browser", "unknown"),
+                base_url,
+            )
+            return True
+    except Exception:
+        return False
+
+
+async def ensure_browser(port: int = 9222) -> str:
+    """Ensure a Chrome instance is available for CDP connection.
+
+    1. Check if CDP is already available at the configured URL
+    2. If not, find and launch Chrome/Chromium headless
+    3. Return the CDP base URL
+
+    Called automatically when any browser tool is invoked.
+    """
+    global _launched_process, _user_data_dir
+
+    base_url = f"http://localhost:{port}"
+
+    # 1. Check if already running
+    if await _probe_cdp(base_url):
+        return base_url
+
+    # 2. Find Chrome binary
+    chrome_path = _find_chrome()
+    if not chrome_path:
+        raise RuntimeError(
+            "No Chrome or Chromium found. Install Google Chrome, Chromium, or Brave, "
+            "or set browser.cdp_url to point to a running instance."
+        )
+
+    # 3. Launch headless with debug port
+    _user_data_dir = tempfile.mkdtemp(prefix="cptr-browser-")
+
+    args = [
+        chrome_path,
+        f"--remote-debugging-port={port}",
+        "--headless=new",
+        "--no-first-run",
+        "--no-default-browser-check",
+        "--disable-background-networking",
+        "--disable-sync",
+        "--disable-translate",
+        "--disable-extensions",
+        f"--user-data-dir={_user_data_dir}",
+        "about:blank",
+    ]
+
+    logger.info("Launching Chrome: %s", " ".join(args[:3]))
+
+    _launched_process = await asyncio.create_subprocess_exec(
+        *args,
+        stdout=asyncio.subprocess.DEVNULL,
+        stderr=asyncio.subprocess.DEVNULL,
+    )
+
+    # Wait for CDP to become available
+    for _ in range(20):
+        await asyncio.sleep(0.5)
+        if await _probe_cdp(base_url):
+            logger.info("Chrome launched successfully on port %d", port)
+            return base_url
+
+    raise RuntimeError(
+        f"Chrome launched but CDP not responding on port {port} after 10s. "
+        f"Binary: {chrome_path}"
+    )
+
+
+async def shutdown_browser() -> None:
+    """Kill the Chrome process we launched (if any). Called on app shutdown."""
+    global _launched_process, _user_data_dir
+
+    if _launched_process and _launched_process.returncode is None:
+        logger.info("Shutting down launched Chrome (pid %d)", _launched_process.pid)
+        try:
+            _launched_process.terminate()
+            await asyncio.wait_for(_launched_process.wait(), timeout=5)
+        except (asyncio.TimeoutError, ProcessLookupError):
+            try:
+                _launched_process.kill()
+            except ProcessLookupError:
+                pass
+        _launched_process = None
+
+    # Clean up temp profile
+    if _user_data_dir:
+        import shutil as sh
+
+        try:
+            sh.rmtree(_user_data_dir, ignore_errors=True)
+        except Exception:
+            pass
+        _user_data_dir = None
diff --git a/cptr/utils/browser/session.py b/cptr/utils/browser/session.py
new file mode 100644
index 0000000..b30a4bc
--- /dev/null
+++ b/cptr/utils/browser/session.py
@@ -0,0 +1,88 @@
+"""Per-chat browser session manager.
+
+Maintains one CDPClient per chat so the AI can do multi-step browser flows
+(navigate -> snapshot -> click -> type -> snapshot) without losing state.
+Sessions auto-close after an idle timeout.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import time
+
+from cptr.utils.browser.cdp import CDPClient
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TIMEOUT_MINUTES = 10
+
+
+class BrowserSessionManager:
+    """One browser session per chat, with idle timeout cleanup."""
+
+    def __init__(self) -> None:
+        self._sessions: dict[str, CDPClient] = {}
+        self._last_used: dict[str, float] = {}
+        self._cleanup_task: asyncio.Task | None = None
+        self._timeout_minutes = DEFAULT_TIMEOUT_MINUTES
+
+    def set_timeout(self, minutes: int) -> None:
+        self._timeout_minutes = max(1, minutes)
+
+    async def get_or_create(self, chat_id: str, cdp_url: str) -> CDPClient:
+        """Get an existing session for this chat, or create a new one."""
+        if chat_id in self._sessions:
+            client = self._sessions[chat_id]
+            if not client._closed:
+                self._last_used[chat_id] = time.monotonic()
+                return client
+            # Session was closed externally, remove it
+            del self._sessions[chat_id]
+            self._last_used.pop(chat_id, None)
+
+        # Create new session
+        client = await CDPClient.connect(cdp_url)
+        self._sessions[chat_id] = client
+        self._last_used[chat_id] = time.monotonic()
+
+        # Start cleanup loop if not running
+        if self._cleanup_task is None or self._cleanup_task.done():
+            self._cleanup_task = asyncio.create_task(self._cleanup_loop())
+
+        logger.info("Browser session created for chat %s", chat_id[:8])
+        return client
+
+    async def close(self, chat_id: str) -> None:
+        """Close and remove a specific chat's session."""
+        client = self._sessions.pop(chat_id, None)
+        self._last_used.pop(chat_id, None)
+        if client:
+            await client.close()
+            logger.info("Browser session closed for chat %s", chat_id[:8])
+
+    async def close_all(self) -> None:
+        """Close all sessions. Called on app shutdown."""
+        if self._cleanup_task and not self._cleanup_task.done():
+            self._cleanup_task.cancel()
+        for chat_id in list(self._sessions):
+            await self.close(chat_id)
+
+    async def _cleanup_loop(self) -> None:
+        """Periodically close idle sessions."""
+        while self._sessions:
+            await asyncio.sleep(60)  # Check every minute
+            now = time.monotonic()
+            timeout_seconds = self._timeout_minutes * 60
+            expired = [
+                cid
+                for cid, last in self._last_used.items()
+                if now - last > timeout_seconds
+            ]
+            for chat_id in expired:
+                logger.info("Browser session timed out for chat %s", chat_id[:8])
+                await self.close(chat_id)
+
+
+# Singleton instance
+session_manager = BrowserSessionManager()
diff --git a/cptr/utils/chat_task.py b/cptr/utils/chat_task.py
index 74d02c6..93fdfe0 100644
--- a/cptr/utils/chat_task.py
+++ b/cptr/utils/chat_task.py
@@ -560,7 +560,7 @@ async def _load_message_history(
                 
                 text_content = entry["content"]
                 
-                # Append file:// references so the AI can read them with view_file
+                # Append file:// references so the AI can read them with read_file
                 if non_images:
                     from cptr.utils.storage import UPLOADS_DIR
                     file_refs = []
@@ -600,16 +600,18 @@ async def _load_message_history(
             tool_calls = []
             for item in m.output:
                 if item.get("type") == "function_call" and item.get("status") == "completed":
-                    tool_calls.append(
-                        {
-                            "id": item["call_id"],
-                            "type": "function",
-                            "function": {
-                                "name": item["name"],
-                                "arguments": json.dumps(item.get("arguments", {})),
-                            },
-                        }
-                    )
+                    tc = {
+                        "id": item["call_id"],
+                        "type": "function",
+                        "function": {
+                            "name": item["name"],
+                            "arguments": json.dumps(item.get("arguments", {})),
+                        },
+                    }
+                    # Preserve Responses API fc_ ID for round-tripping
+                    if item.get("fc_id"):
+                        tc["fc_id"] = item["fc_id"]
+                    tool_calls.append(tc)
             if tool_calls:
                 entry["tool_calls"] = tool_calls
 
@@ -627,12 +629,32 @@ async def _load_message_history(
     return result, existing_summary
 
 
+def _parse_image_data_uri(result: str) -> tuple[str, str] | None:
+    """Check if a tool result is a data URI image (from read_file on image files).
+
+    Returns (media_type, base64_data) if it's a data URI image, else None.
+    """
+    if not result.startswith("data:image/"):
+        return None
+    # data:image/png;base64,iVBOR...
+    try:
+        header, b64_data = result.split(",", 1)
+        media_type = header.split(";")[0].replace("data:", "")
+        return media_type, b64_data
+    except (ValueError, IndexError):
+        return None
+
+
 def _append_tool_to_messages(messages: list[dict], event: dict, result: str, provider: str):
     """Append a tool call + result to the message history for the next API call."""
-    # Guard against oversized tool outputs
-    if len(result) > CHAT_TOOL_MAX_CHARS:
-        half = CHAT_TOOL_MAX_CHARS // 2
-        result = result[:half] + "\n\n...(truncated)...\n\n" + result[-half:]
+    # Check for image result before truncation (data URI is large but needed)
+    image = _parse_image_data_uri(result)
+
+    if not image:
+        # Guard against oversized tool outputs (skip for images, handled above)
+        if len(result) > CHAT_TOOL_MAX_CHARS:
+            half = CHAT_TOOL_MAX_CHARS // 2
+            result = result[:half] + "\n\n...(truncated)...\n\n" + result[-half:]
 
     # Add assistant message with tool_call
     messages.append(
@@ -642,6 +664,7 @@ def _append_tool_to_messages(messages: list[dict], event: dict, result: str, pro
             "tool_calls": [
                 {
                     "id": event["call_id"],
+                    "fc_id": event.get("id", ""),
                     "type": "function",
                     "function": {
                         "name": event["name"],
@@ -651,14 +674,35 @@ def _append_tool_to_messages(messages: list[dict], event: dict, result: str, pro
             ],
         }
     )
-    # Add tool result
-    messages.append(
-        {
-            "role": "tool",
-            "tool_call_id": event["call_id"],
-            "content": result,
-        }
-    )
+
+    if image:
+        # Structured multimodal content — provider converters handle the
+        # "image" block type appropriately for each API.
+        media_type, b64_data = image
+        path = event["arguments"].get("path", "image")
+        messages.append(
+            {
+                "role": "tool",
+                "tool_call_id": event["call_id"],
+                "content": [
+                    {"type": "text", "text": f"Image file: {path}"},
+                    {
+                        "type": "image",
+                        "media_type": media_type,
+                        "base64": b64_data,
+                    },
+                ],
+            }
+        )
+    else:
+        # Plain text tool result
+        messages.append(
+            {
+                "role": "tool",
+                "tool_call_id": event["call_id"],
+                "content": result,
+            }
+        )
 
 
 def _find_safe_split(messages: list[dict], target_keep: int) -> int:
@@ -798,7 +842,7 @@ def _sync_state():
             system += f"\n\n[CONVERSATION SUMMARY]\n{loaded_summary}"
         if regeneration_prompt:
             messages.append({"role": "user", "content": regeneration_prompt})
-        tools = get_tool_list()
+        tools = await get_tool_list()
 
         # Remove view_skill tool if no skills are available
         skills = discover_skills(workspace)
@@ -902,9 +946,36 @@ def _sync_state():
                     message_id[:8], len(drop_zone), len(keep_zone), len(summary),
                 )
 
+            # Anthropic supports images natively in tool_result content blocks.
+            # Chat Completions and Responses API don't support multimodal tool messages,
+            # so extract images into a follow-up user message.
+            api_messages = messages
+            if provider != "anthropic":
+                image_blocks = []
+                api_messages = []
+                for m in messages:
+                    if m.get("role") == "tool" and isinstance(m.get("content"), list):
+                        text_parts = []
+                        for part in m["content"]:
+                            if part.get("type") == "text":
+                                text_parts.append(part.get("text", ""))
+                            elif part.get("type") == "image":
+                                image_blocks.append(part)
+                        api_messages.append({**m, "content": "\n".join(text_parts)})
+                    else:
+                        api_messages.append(m)
+                if image_blocks:
+                    api_messages.append({
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "Here are the images from the tool results above."},
+                            *image_blocks,
+                        ],
+                    })
+
             form_data = ChatCompletionForm(
                 model=model,
-                messages=messages,
+                messages=api_messages,
                 instructions=system,
                 tools=tools,
             )
@@ -935,6 +1006,7 @@ def _sync_state():
                         "type": "function_call",
                         "id": str(uuid.uuid4()),
                         "call_id": event["call_id"],
+                        "fc_id": event.get("id", ""),
                         "name": name,
                         "arguments": event["arguments"],
                     }
@@ -955,7 +1027,7 @@ def _sync_state():
                         if name == "create_artifact":
                             result = await create_artifact(**event["arguments"], workspace=workspace)
                         else:
-                            result = await execute_tool(name, event["arguments"], {"workspace": workspace, "user_id": user_id, "model_id": model})
+                            result = await execute_tool(name, event["arguments"], {"workspace": workspace, "user_id": user_id, "model_id": model, "chat_id": chat_id})
 
                         # Update status to completed
                         item["status"] = "completed"
@@ -1073,15 +1145,35 @@ def _sync_state():
     except Exception as e:
         logger.exception(f"Chat task error for message {message_id}")
         _flush_text()
+        error_msg = str(e)
+        # Try to extract API error body for more detail
+        if hasattr(e, 'response'):
+            try:
+                body = e.response.text or ""
+                if body:
+                    import json as _json
+                    err_data = _json.loads(body)
+                    api_msg = err_data.get("error", {}).get("message", "")
+                    if api_msg:
+                        error_msg = api_msg
+            except Exception:
+                pass
+        # Append error to content so it's visible in the chat
+        error_block = f"\n\n> **Error:** {error_msg}"
+        content += error_block
+        text_buffer += error_block
+        flushed_item = _flush_text()
+        if flushed_item:
+            await emit(output=flushed_item)
         await ChatMessage.update(
             message_id,
             content=content,
             output=output_items,
             done=True,
-            meta={"error": str(e)},
+            meta={"error": error_msg},
         )
         _task_state.pop(message_id, None)
-        await _emit_done()
+        await emit(done=True, error=error_msg)
     finally:
         _tasks.pop(message_id, None)
         _task_state.pop(message_id, None)
diff --git a/cptr/utils/tools.py b/cptr/utils/tools.py
index bce8ff6..7ca14c1 100644
--- a/cptr/utils/tools.py
+++ b/cptr/utils/tools.py
@@ -78,6 +78,84 @@ def _truncate_output(text: str, max_chars: int = 80_000) -> str:
     return text[:half] + "\n\n... (truncated) ...\n\n" + text[-half:]
 
 
+# ── Image support ───────────────────────────────────────────
+
+IMAGE_EXTENSIONS = {
+    ".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif",
+}
+
+_IMAGE_MAX_BYTES = 5 * 1024 * 1024  # 5 MB target for API payload
+
+_IMAGE_MIME = {
+    ".png": "image/png",
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".gif": "image/gif",
+    ".webp": "image/webp",
+    ".bmp": "image/bmp",
+    ".tiff": "image/tiff",
+    ".tif": "image/tiff",
+}
+
+
+def _read_image_file(full: Path, path: str) -> str:
+    """Read an image file and return a data URI string.
+
+    If the file exceeds _IMAGE_MAX_BYTES, attempts to resize it down
+    using Pillow.  Falls back to a text error if Pillow is unavailable
+    and the file is too large.
+    """
+    import base64
+
+    size = full.stat().st_size
+    ext = full.suffix.lower()
+    media_type = _IMAGE_MIME.get(ext, "image/png")
+    data = full.read_bytes()
+
+    if size > _IMAGE_MAX_BYTES:
+        try:
+            from PIL import Image
+            import io
+
+            img = Image.open(io.BytesIO(data))
+            # Progressively scale down until under limit
+            # Use JPEG for lossy formats, PNG for lossless
+            out_format = "JPEG" if ext in (".jpg", ".jpeg", ".bmp", ".tiff", ".tif") else "PNG"
+            if out_format == "JPEG":
+                media_type = "image/jpeg"
+                # Convert RGBA to RGB for JPEG
+                if img.mode in ("RGBA", "P"):
+                    img = img.convert("RGB")
+            else:
+                media_type = "image/png"
+
+            scale = 0.8  # start at 80%
+            for _ in range(10):
+                new_w = int(img.width * scale)
+                new_h = int(img.height * scale)
+                if new_w < 100 or new_h < 100:
+                    break
+                resized = img.resize((new_w, new_h), Image.LANCZOS)
+                buf = io.BytesIO()
+                save_kwargs = {"quality": 85} if out_format == "JPEG" else {}
+                resized.save(buf, format=out_format, **save_kwargs)
+                if buf.tell() <= _IMAGE_MAX_BYTES:
+                    data = buf.getvalue()
+                    size = len(data)
+                    break
+                scale *= 0.7  # more aggressive on each pass
+            else:
+                return f"Error: image too large ({_human_size(full.stat().st_size)}) and could not be resized below 5MB."
+        except ImportError:
+            return (
+                f"Error: image file is too large ({_human_size(size)}). "
+                f"Install Pillow (`pip install Pillow`) to enable automatic resizing."
+            )
+
+    b64 = base64.b64encode(data).decode("ascii")
+    return f"data:{media_type};base64,{b64}"
+
+
 # ── Tool functions ──────────────────────────────────────────
 
 
@@ -100,6 +178,10 @@ async def read_file(
     if not full.is_file():
         return f"Error: file not found: {path}"
 
+    # Image files: return base64 JSON instead of garbled text
+    if full.suffix.lower() in IMAGE_EXTENSIONS:
+        return await asyncio.to_thread(_read_image_file, full, path)
+
     def _read():
         size = full.stat().st_size
         if size > 500_000:
@@ -939,6 +1021,158 @@ async def view_skill(
     return format_skill_content(skill)
 
 
+# ── Browser tools ────────────────────────────────────────────
+
+
+async def _get_browser_config() -> dict:
+    """Read browser config from DB."""
+    try:
+        from cptr.models import Config
+
+        return {
+            "enabled": await Config.get("browser.enabled") or False,
+            "provider": await Config.get("browser.provider") or "local",
+            "cdp_url": await Config.get("browser.cdp_url") or "http://localhost:9222",
+            "auto_launch": await Config.get("browser.auto_launch") if await Config.get("browser.auto_launch") is not None else True,
+            "session_timeout": int(await Config.get("browser.session_timeout_minutes") or 10),
+            "firecrawl_api_key": await Config.get("browser.firecrawl_api_key") or "",
+            "firecrawl_base_url": await Config.get("browser.firecrawl_base_url") or "https://api.firecrawl.dev",
+            "browser_use_api_key": await Config.get("browser.browser_use_api_key") or "",
+            "browser_use_base_url": await Config.get("browser.browser_use_base_url") or "https://api.browser-use.com",
+        }
+    except Exception:
+        return {"enabled": False, "provider": "local"}
+
+
+async def _get_cdp_session(chat_id: str) -> "CDPClient":
+    """Get or create a CDP session for the current chat."""
+    cfg = await _get_browser_config()
+    cdp_url = cfg["cdp_url"]
+
+    if cfg.get("auto_launch", True):
+        from cptr.utils.browser.launcher import ensure_browser
+
+        cdp_url = await ensure_browser(port=int(cdp_url.split(":")[-1]))
+
+    from cptr.utils.browser.session import session_manager
+
+    session_manager.set_timeout(cfg.get("session_timeout", 10))
+    return await session_manager.get_or_create(chat_id, cdp_url)
+
+
+async def browser_navigate(url: str, *, __context__: dict) -> str:
+    """Navigate to a URL in the browser. Returns the page title and status.
+    :param url: The URL to navigate to.
+    """
+    cfg = await _get_browser_config()
+    provider = cfg.get("provider", "local")
+
+    if provider == "firecrawl":
+        key = cfg.get("firecrawl_api_key", "")
+        if not key:
+            return "Error: Firecrawl API key not configured. Set it in Settings > Browser."
+        from cptr.utils.browser.firecrawl import scrape
+
+        content = await scrape(url, key, cfg.get("firecrawl_base_url", ""))
+        return f"Navigated to {url} (via Firecrawl)\n\n{content}"
+
+    if provider == "browser_use":
+        key = cfg.get("browser_use_api_key", "")
+        if not key:
+            return "Error: Browser-Use API key not configured. Set it in Settings > Browser."
+        from cptr.utils.browser.browser_use import browse
+
+        result = await browse(f"Navigate to {url} and describe what you see", key, cfg.get("browser_use_base_url", ""))
+        return f"Navigated to {url} (via Browser-Use)\n\n{result}"
+
+    # Local CDP
+    chat_id = __context__.get("chat_id", "default")
+    client = await _get_cdp_session(chat_id)
+    result = await client.navigate(url)
+    return f"Navigated to {url}\nTitle: {result.get('title', '')}"
+
+
+async def browser_snapshot(*, __context__: dict) -> str:
+    """Get the current page content. For local browser, returns an accessibility tree with ref IDs (@e1, @e2, etc.) that can be used with browser_click and browser_type. For cloud providers, returns page content as text."""
+    cfg = await _get_browser_config()
+    provider = cfg.get("provider", "local")
+
+    if provider in ("firecrawl", "browser_use"):
+        return "Snapshot is only meaningful after browser_navigate. The navigate result already contains the page content."
+
+    chat_id = __context__.get("chat_id", "default")
+    client = await _get_cdp_session(chat_id)
+    return await client.snapshot()
+
+
+async def browser_click(ref: str, *, __context__: dict) -> str:
+    """Click an element on the page identified by its ref ID from the snapshot (e.g. @e1).
+    :param ref: The ref ID of the element to click (e.g. @e1, @e5).
+    """
+    cfg = await _get_browser_config()
+    if cfg.get("provider", "local") != "local":
+        return "Error: browser_click requires Local CDP provider. Cloud providers (Firecrawl, Browser-Use) don't support interactive browsing. Switch to Local CDP in Settings > Browser."
+
+    chat_id = __context__.get("chat_id", "default")
+    client = await _get_cdp_session(chat_id)
+    await client.click(ref)
+    # Return updated snapshot so the AI sees the result
+    return await client.snapshot()
+
+
+async def browser_type(ref: str, text: str, *, __context__: dict) -> str:
+    """Type text into an input element identified by its ref ID from the snapshot.
+    :param ref: The ref ID of the input element (e.g. @e3).
+    :param text: The text to type.
+    """
+    cfg = await _get_browser_config()
+    if cfg.get("provider", "local") != "local":
+        return "Error: browser_type requires Local CDP provider. Switch to Local CDP in Settings > Browser."
+
+    chat_id = __context__.get("chat_id", "default")
+    client = await _get_cdp_session(chat_id)
+    await client.type_text(ref, text)
+    return await client.snapshot()
+
+
+async def browser_screenshot(*, __context__: dict) -> str:
+    """Take a screenshot of the current browser page. Saves the image to the workspace.
+    """
+    cfg = await _get_browser_config()
+    if cfg.get("provider", "local") != "local":
+        return "Error: browser_screenshot requires Local CDP provider."
+
+    chat_id = __context__.get("chat_id", "default")
+    client = await _get_cdp_session(chat_id)
+    png_bytes = await client.screenshot()
+
+    # Save to workspace
+    workspace = __context__.get("workspace", ".")
+    screenshots_dir = Path(workspace) / ".cptr" / "screenshots"
+    screenshots_dir.mkdir(parents=True, exist_ok=True)
+
+    import time
+
+    filename = f"screenshot_{int(time.time())}.png"
+    filepath = screenshots_dir / filename
+    filepath.write_bytes(png_bytes)
+
+    return f"Screenshot saved: {filepath}"
+
+
+async def browser_evaluate(javascript: str, *, __context__: dict) -> str:
+    """Execute JavaScript in the browser page and return the result.
+    :param javascript: The JavaScript expression to evaluate.
+    """
+    cfg = await _get_browser_config()
+    if cfg.get("provider", "local") != "local":
+        return "Error: browser_evaluate requires Local CDP provider."
+
+    chat_id = __context__.get("chat_id", "default")
+    client = await _get_cdp_session(chat_id)
+    return await client.evaluate(javascript)
+
+
 # ── Registry ────────────────────────────────────────────────
 
 TOOLS: dict[str, dict] = {
@@ -964,6 +1198,16 @@ async def view_skill(
     "delete_automation": {"fn": delete_automation, "auto": False},
 }
 
+# Browser tools — registered conditionally based on browser.enabled config
+BROWSER_TOOLS: dict[str, dict] = {
+    "browser_navigate": {"fn": browser_navigate, "auto": False},
+    "browser_snapshot": {"fn": browser_snapshot, "auto": True},
+    "browser_click": {"fn": browser_click, "auto": False},
+    "browser_type": {"fn": browser_type, "auto": False},
+    "browser_screenshot": {"fn": browser_screenshot, "auto": True},
+    "browser_evaluate": {"fn": browser_evaluate, "auto": False},
+}
+
 
 # ── Schema from function signature ──────────────────────────
 
@@ -1017,14 +1261,25 @@ def _fn_to_schema(name: str, fn) -> dict:
     }
 
 
-def get_tool_list() -> list[dict]:
-    """Return tool schemas for the LLM."""
-    return [_fn_to_schema(name, t["fn"]) for name, t in TOOLS.items()]
+async def get_tool_list() -> list[dict]:
+    """Return tool schemas for the LLM.
+
+    Automatically includes browser tools when browser.enabled is true in config.
+    """
+    tools = dict(TOOLS)
+    try:
+        from cptr.models import Config
+
+        if (await Config.get("browser.enabled")) in (True, "true", "1"):
+            tools.update(BROWSER_TOOLS)
+    except Exception:
+        pass
+    return [_fn_to_schema(name, t["fn"]) for name, t in tools.items()]
 
 
 async def execute_tool(name: str, args: dict, __context__: dict) -> str:
     """Execute a tool by name, injecting execution context."""
-    info = TOOLS.get(name)
+    info = TOOLS.get(name) or BROWSER_TOOLS.get(name)
     if not info:
         return f"Error: unknown tool: {name}"
     fn = info["fn"]
diff --git a/pyproject.toml b/pyproject.toml
index d9f00e4..fc5c4c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "cptr"
-version = "0.3.0"
+version = "0.3.1"
 description = "Your computer, from anywhere. Code, manage, and control your machine from the web."
 license = {file = "LICENSE"}
 readme = "README.md"