From bf30cdf1fdb5f027f497eaf0debc352e44066de0 Mon Sep 17 00:00:00 2001
From: KillerQueen-Z <1211904451@qq.com>
Date: Thu, 11 Jun 2026 15:45:42 -0700
Subject: [PATCH 1/2] feat: add 'franklin predict' prediction mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A headless command + restricted capability profile for forecasting one
real-world event. predictionCapabilities exposes only research tools
(web search, webfetch, Exa, search X, prediction markets, market data) —
no filesystem, shell, media, swaps, phone or sandbox. 'franklin predict
--model M --question Q --json' runs the agent and prints a JSON envelope
(finalText + tool trace + usage), so non-interactive callers (e.g.
franklin.bet) can drive grounded, tool-using predictions over the CLI.
---
 src/commands/predict.ts | 155 ++++++++++++++++++++++++++++++++++++++++
 src/index.ts            |  12 ++++
 src/tools/index.ts      |  27 +++++++
 3 files changed, 194 insertions(+)
 create mode 100644 src/commands/predict.ts

diff --git a/src/commands/predict.ts b/src/commands/predict.ts
new file mode 100644
index 0000000..402c0ad
--- /dev/null
+++ b/src/commands/predict.ts
@@ -0,0 +1,155 @@
+/**
+ * `franklin predict` — Franklin prediction mode (headless).
+ *
+ * Runs ONE model as a disciplined forecaster: it researches a single real-world
+ * event with a tight, read-only toolset (web search, source fetch, Exa, X, live
+ * prediction markets, a little market data) the way a bettor would before
+ * putting money down — then commits to a pick with a confidence.
+ *
+ * Designed for machine callers (e.g. BlockRun Oracle): with --json it emits a
+ * single JSON envelope on stdout containing the model's final answer, the full
+ * tool-call trace (what it searched and what it found), the terminal reason and
+ * token usage. Human-readable streaming otherwise.
+ *
+ *   franklin predict --model anthropic/claude-opus-4.8 \
+ *     --question "Who wins the 2026 FIFA World Cup? Pick one country." --json
+ */
+import { interactiveSession } from '../agent/loop.js';
+import type { AgentConfig, StreamEvent, StreamTurnDone } from '../agent/types.js';
+import { predictionCapabilities, resetToolSessionState } from '../tools/index.js';
+import { loadChain, API_URLS } from '../config.js';
+import { resolveModel } from '../ui/model-picker.js';
+
+export interface PredictOptions {
+  model?: string;
+  question?: string;
+  maxTurns?: string;
+  maxSpend?: string;
+  json?: boolean;
+  debug?: boolean;
+}
+
+const PREDICTION_SYSTEM: string[] = [
+  'You are a sharp, disciplined forecasting analyst — think like a professional who is about to put real money on this question.',
+  'Your job: predict the outcome of ONE real-world event. Before answering you MUST do research the way a bettor would:',
+  "1. Use web_search (and webfetch / exa tools) for the most CURRENT facts and news — today's real-world state matters far more than your training data.",
+  '2. Use search_prediction_markets to read the CURRENT market-implied odds (Polymarket, Kalshi, etc.) for this or a closely related question.',
+  '3. Weigh it: where is the consensus, where might the market be mispriced, what is your edge.',
+  'Keep tool use focused — a handful of targeted calls, not dozens. When you have enough to decide, STOP researching and answer.',
+  'Your FINAL message must end with EXACTLY ONE single-line minified JSON object and NOTHING after it:',
+  '{"pick": string, "confidence": number, "rationale": string, "analysis": string, "marketOdds": string}',
+  '- pick: one option from the question (a short label, e.g. a country, party, bucket, or Yes/No).',
+  '- confidence: your probability (0-1) that THIS pick is correct.',
+  '- rationale: one sharp sentence (max 22 words).',
+  '- analysis: 3-5 sentences citing what your research found, the strongest counter-argument, and why you still land here. No literal newlines inside the string.',
+  "- marketOdds: what the prediction market currently implies (e.g. 'Polymarket: France 18%'), or 'n/a' if none found.",
+  'Be decisive. Do not hedge with "it depends".',
+];
+
+interface TraceEntry {
+  tool: string;
+  input: string;
+  output: string;
+  isError?: boolean;
+}
+
+export async function predictCommand(options: PredictOptions): Promise<void> {
+  const question = options.question?.trim();
+  if (!question) {
+    process.stderr.write('predict: --question is required\n');
+    process.exitCode = 1;
+    return;
+  }
+  if (!options.model) {
+    process.stderr.write('predict: --model is required\n');
+    process.exitCode = 1;
+    return;
+  }
+
+  const chain = loadChain();
+  const apiUrl = API_URLS[chain];
+  const model = resolveModel(options.model);
+  const asJson = options.json !== false;
+
+  resetToolSessionState();
+
+  const agentConfig: AgentConfig = {
+    model,
+    apiUrl,
+    chain,
+    systemInstructions: PREDICTION_SYSTEM,
+    capabilities: predictionCapabilities,
+    maxTurns: options.maxTurns != null ? Number(options.maxTurns) : 12,
+    permissionMode: 'trust',
+    debug: !!options.debug,
+    showPrefetchStatus: false,
+    ...(options.maxSpend != null ? { maxSpendUsd: Number(options.maxSpend) } : {}),
+  };
+
+  let finalText = '';
+  let turnReason: StreamTurnDone['reason'] = 'completed';
+  let turnError: string | undefined;
+  let inputTokens = 0;
+  let outputTokens = 0;
+  const trace: TraceEntry[] = [];
+  const nameById = new Map<string, string>();
+  const inputById = new Map<string, string>();
+  const previewById = new Map<string, string>();
+
+  let delivered = false;
+  const getInput = async (): Promise<string | null> => {
+    if (delivered) return null;
+    delivered = true;
+    return question;
+  };
+
+  await interactiveSession(agentConfig, getInput, (event: StreamEvent) => {
+    switch (event.kind) {
+      case 'text_delta':
+        finalText += event.text;
+        if (!asJson) process.stdout.write(event.text);
+        break;
+      case 'capability_start':
+        nameById.set(event.id, event.name);
+        inputById.set(event.id, '');
+        if (event.preview) previewById.set(event.id, event.preview);
+        if (!asJson) process.stderr.write(`\n  · ${event.name}${event.preview ? ` ${event.preview}` : ''}\n`);
+        break;
+      case 'capability_input_delta':
+        inputById.set(event.id, (inputById.get(event.id) || '') + event.delta);
+        break;
+      case 'capability_done': {
+        const tool = nameById.get(event.id) || 'tool';
+        const input = (inputById.get(event.id) || '').trim() || previewById.get(event.id) || '';
+        const output = event.result?.fullOutput || event.result?.output || '';
+        trace.push({ tool, input, output: output.slice(0, 1500), isError: event.result?.isError });
+        break;
+      }
+      case 'usage':
+        inputTokens = event.inputTokens;
+        outputTokens = event.outputTokens;
+        break;
+      case 'turn_done':
+        turnReason = event.reason;
+        turnError = event.error;
+        break;
+    }
+  });
+
+  if (asJson) {
+    const envelope = {
+      model,
+      question,
+      finalText: finalText.trim(),
+      trace,
+      turnReason,
+      ...(turnError ? { error: turnError } : {}),
+      usage: { inputTokens, outputTokens },
+    };
+    process.stdout.write(JSON.stringify(envelope) + '\n');
+  } else if (turnReason !== 'completed' && turnError) {
+    process.stderr.write(`\n${turnError}\n`);
+  }
+
+  process.exitCode = turnReason === 'completed' ? 0 : 1;
+}
diff --git a/src/index.ts b/src/index.ts
index 9e7dc7d..46ea431 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -29,6 +29,7 @@ import { uninitCommand } from './commands/uninit.js';
 import { proxyCommand } from './commands/proxy.js';
 import { buildTaskCommand } from './commands/task.js';
 import { buildContentCommand } from './commands/content.js';
+import { predictCommand } from './commands/predict.js';
 
 import { VERSION as version } from './config.js';
 
@@ -90,6 +91,17 @@ program
   .option('--debug', 'Enable debug logging')
   .action((options) => proxyCommand({ ...options, version }));
 
+program
+  .command('predict')
+  .description('Prediction mode — forecast one real-world event with a research-only toolset (web/markets), headless')
+  .requiredOption('-m, --model <model>', 'Model to use (e.g. anthropic/claude-opus-4.8, openai/gpt-5.5)')
+  .requiredOption('-q, --question <text>', 'The event question to forecast (include the allowed options)')
+  .option('--max-turns <n>', 'Max agent turns before forcing an answer', '12')
+  .option('--max-spend <usd>', 'Hard USD cap on this prediction run')
+  .option('--no-json', 'Human-readable streaming instead of a JSON envelope')
+  .option('--debug', 'Enable debug logging')
+  .action((options) => predictCommand(options));
+
 program
   .command('init')
   .description('Configure franklin auto-start (writes ~/.claude/settings.json + installs LaunchAgent on macOS)')
diff --git a/src/tools/index.ts b/src/tools/index.ts
index eb48d4a..e2c34d1 100644
--- a/src/tools/index.ts
+++ b/src/tools/index.ts
@@ -234,4 +234,31 @@ export {
   detachCapability,
 };
 
+/**
+ * "Franklin prediction mode" toolset.
+ *
+ * A deliberately tight, research-only capability set for forecasting a single
+ * real-world event the way a careful bettor would: gather current facts, read
+ * sources, check live prediction-market odds and a little market data — then
+ * decide. Everything else (filesystem, shell, media generation, swaps/trade
+ * execution, phone/voice, GPU sandbox, posting) is intentionally excluded:
+ * a forecaster looks things up, it does not act on the world or spend beyond
+ * the cheap read calls these tools make.
+ *
+ * Used by the `franklin predict` command and reusable by any headless caller
+ * (e.g. franklin.bet) that wants a grounded prediction.
+ */
+export const predictionCapabilities: CapabilityHandler[] = [
+  webSearchCapability,        // web_search — current news & facts
+  webFetchCapability,         // webfetch — read a specific source URL
+  exaSearchCapability,        // exa search — higher-quality web research
+  exaAnswerCapability,        // exa answer — direct sourced answers
+  exaReadUrlsCapability,      // exa read — pull full text of found URLs
+  searchXCapability,          // search X — live sentiment / breaking signal
+  predictionMarketCapability, // search_prediction_markets — live implied odds
+  tradingSignalCapability,    // market signal/indicators (for market-type events)
+  tradingMarketCapability,    // market snapshot data
+  defiLlamaPriceCapability,   // token price lookup (crypto-type events)
+];
+
 export { createSubAgentCapability } from './subagent.js';

From a716b598e3cced50390014a7b23d97b637cc5b46 Mon Sep 17 00:00:00 2001
From: KillerQueen-Z <1211904451@qq.com>
Date: Thu, 11 Jun 2026 17:22:25 -0700
Subject: [PATCH 2/2] feat(predict): governance for reliable one-shot answers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some models never stop calling tools and hit maxTurns with an EMPTY answer
(wasting the spend). Adds opt-in agent governance, used by prediction mode:

- forceAnswerOnFinalTurn / maxToolCalls: withhold tools once the tool budget
  (or final turn) is reached, so the model must commit to a text answer.
- disableModelFallback: don't silently switch to a different model on empty/
  stalled output — a one-shot caller wants a clean abstain from the asked model.
- disableGroundingRetry: skip the 'ungrounded claims → force tool use' retry,
  which fought the forced answer and polluted structured output.

predict defaults: maxTurns 8, maxToolCalls 6 (tool budget is the real limiter;
turns are slack for a thinking turn + the forced-answer turn). Verified: GPT-5.5
and DeepSeek V4 Pro, which previously returned empty at maxTurns, now answer
cleanly. (MiniMax M3 / Kimi K2.6 still fail — upstream OpenRouter function-call
defects, not addressed here.)
---
 src/agent/loop.ts       | 20 +++++++++++++++++---
 src/agent/types.ts      | 24 ++++++++++++++++++++++++
 src/commands/predict.ts | 13 +++++++++++--
 src/index.ts            |  3 ++-
 4 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/src/agent/loop.ts b/src/agent/loop.ts
index beba48d..575d83f 100644
--- a/src/agent/loop.ts
+++ b/src/agent/loop.ts
@@ -1382,6 +1382,20 @@ export async function interactiveSession(
         callMaxTokens = 2048;  // Short plan output
         callSystemPrompt = systemPrompt + '\n\n' + getPlanningPrompt();
       }
+      // Force a final answer: withhold tools so the model must commit to text,
+      // either on the last turn or once the tool-call budget is spent. Without
+      // this, models that keep calling tools every turn hit maxTurns with no
+      // answer (and waste the spend). Opt-in per config.
+      const onFinalTurn = config.forceAnswerOnFinalTurn && loopCount === maxTurns;
+      const toolBudgetSpent = config.maxToolCalls != null && turnToolCalls >= config.maxToolCalls;
+      if ((onFinalTurn || toolBudgetSpent) && callToolDefs.length > 0) {
+        callToolDefs = [];
+        callSystemPrompt = systemPrompt + '\n\n' +
+          (toolBudgetSpent
+            ? `You have used your research budget (${config.maxToolCalls} tool calls) — no more tools are available.`
+            : 'This is your FINAL turn — no more tools are available.') +
+          ' Based on the research so far, output ONLY the final answer now, in the exact format requested.';
+      }
 
       // ── Hallucination guard for weak models ──
       // Weak / free models (nemotron-ultra, GLM-4, qwen coder, free-profile
@@ -1492,7 +1506,7 @@ export async function interactiveSession(
         if (!hasText && !hasTools && !hasThinking) {
           const EMPTY_FALLBACK_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'zai/glm-5.1'];
           const nextModel = EMPTY_FALLBACK_MODELS.find(m => m !== config.model && !turnFailedModels.has(m));
-          if (nextModel && recoveryAttempts < 2) {
+          if (nextModel && recoveryAttempts < 2 && !config.disableModelFallback) {
             recoveryAttempts++;
             turnFailedModels.add(config.model);
             const oldModel = config.model;
@@ -1540,7 +1554,7 @@ export async function interactiveSession(
             const nextModel = TOOL_USE_FALLBACK_MODELS.find(
               m => m !== config.model && !turnFailedModels.has(m),
             );
-            if (nextModel && recoveryAttempts < 2) {
+            if (nextModel && recoveryAttempts < 2 && !config.disableModelFallback) {
               recoveryAttempts++;
               turnFailedModels.add(config.model);
               const oldModel = config.model;
@@ -2126,7 +2140,7 @@ export async function interactiveSession(
             .filter(p => p.type === 'text' && typeof (p as { text?: string }).text === 'string')
             .map(p => (p as { text: string }).text)
             .join('');
-          if (shouldCheckGrounding(lastUserInput || '', assistantText)) {
+          if (!config.disableGroundingRetry && shouldCheckGrounding(lastUserInput || '', assistantText)) {
             const gResult = await checkGrounding(lastUserInput, history, assistantText, client, {
               abortSignal: abort.signal,
             });
diff --git a/src/agent/types.ts b/src/agent/types.ts
index 1aaad00..dba7551 100644
--- a/src/agent/types.ts
+++ b/src/agent/types.ts
@@ -217,6 +217,30 @@ export interface AgentConfig {
   maxSpendUsd?: number;
   /** Show user-visible harness prefetch status lines (interactive UX only). */
   showPrefetchStatus?: boolean;
+  /**
+   * On the final turn, withhold tools so the model must commit to a text answer
+   * instead of researching until cut off. For one-shot forecasting/extraction
+   * callers (e.g. `franklin predict`) where some models never stop calling tools
+   * and would otherwise hit maxTurns with no answer.
+   */
+  forceAnswerOnFinalTurn?: boolean;
+  /**
+   * Hard cap on total tool calls for the turn. Once reached, tools are withheld
+   * and the model is forced to answer from what it has. Bounds research/cost
+   * deterministically (a turn budget alone doesn't — a turn may have no tool).
+   */
+  maxToolCalls?: number;
+  /**
+   * Disable Franklin's automatic model-switching (empty-response / stalled-intent
+   * fallbacks). One-shot callers want a clean abstain from the requested model,
+   * not a silent switch to a different one.
+   */
+  disableModelFallback?: boolean;
+  /**
+   * Disable the post-response "ungrounded claims → force a tool-use retry" guard.
+   * It fights the forced-answer path and pollutes one-shot structured output.
+   */
+  disableGroundingRetry?: boolean;
   /** Mid-turn "research-bloat" compaction — summarizes history when a turn
    *  racks up many tool calls + spend, to cut input-replay cost. Default on;
    *  set false to disable (the desktop exposes this as a toggle). */
diff --git a/src/commands/predict.ts b/src/commands/predict.ts
index 402c0ad..fe97079 100644
--- a/src/commands/predict.ts
+++ b/src/commands/predict.ts
@@ -24,6 +24,7 @@ export interface PredictOptions {
   model?: string;
   question?: string;
   maxTurns?: string;
+  maxToolCalls?: string;
   maxSpend?: string;
   json?: boolean;
   debug?: boolean;
@@ -35,7 +36,7 @@ const PREDICTION_SYSTEM: string[] = [
   "1. Use web_search (and webfetch / exa tools) for the most CURRENT facts and news — today's real-world state matters far more than your training data.",
   '2. Use search_prediction_markets to read the CURRENT market-implied odds (Polymarket, Kalshi, etc.) for this or a closely related question.',
   '3. Weigh it: where is the consensus, where might the market be mispriced, what is your edge.',
-  'Keep tool use focused — a handful of targeted calls, not dozens. When you have enough to decide, STOP researching and answer.',
+  'Budget your research: make AT MOST 4-5 focused tool calls in total. As soon as you have enough to decide, STOP calling tools and output the JSON. Do not keep researching — an answer with light research beats no answer.',
   'Your FINAL message must end with EXACTLY ONE single-line minified JSON object and NOTHING after it:',
   '{"pick": string, "confidence": number, "rationale": string, "analysis": string, "marketOdds": string}',
   '- pick: one option from the question (a short label, e.g. a country, party, bucket, or Yes/No).',
@@ -79,10 +80,18 @@ export async function predictCommand(options: PredictOptions): Promise<void> {
     chain,
     systemInstructions: PREDICTION_SYSTEM,
     capabilities: predictionCapabilities,
-    maxTurns: options.maxTurns != null ? Number(options.maxTurns) : 12,
+    maxTurns: options.maxTurns != null ? Number(options.maxTurns) : 8,
     permissionMode: 'trust',
     debug: !!options.debug,
     showPrefetchStatus: false,
+    // Governance for one-shot forecasting: bound research by tool-call count and
+    // force an answer; don't silently switch models or fight a grounding retry.
+    // Tool budget (5) is the real research limiter; maxTurns (8) is just slack
+    // above it for a thinking turn + the forced-answer turn.
+    forceAnswerOnFinalTurn: true,
+    maxToolCalls: options.maxToolCalls != null ? Number(options.maxToolCalls) : 6,
+    disableModelFallback: true,
+    disableGroundingRetry: true,
     ...(options.maxSpend != null ? { maxSpendUsd: Number(options.maxSpend) } : {}),
   };
 
diff --git a/src/index.ts b/src/index.ts
index 46ea431..7009a4c 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -96,7 +96,8 @@ program
   .description('Prediction mode — forecast one real-world event with a research-only toolset (web/markets), headless')
   .requiredOption('-m, --model <model>', 'Model to use (e.g. anthropic/claude-opus-4.8, openai/gpt-5.5)')
   .requiredOption('-q, --question <text>', 'The event question to forecast (include the allowed options)')
-  .option('--max-turns <n>', 'Max agent turns before forcing an answer', '12')
+  .option('--max-turns <n>', 'Max agent turns before forcing an answer', '8')
+  .option('--max-tool-calls <n>', 'Max tool calls before forcing an answer', '6')
   .option('--max-spend <usd>', 'Hard USD cap on this prediction run')
   .option('--no-json', 'Human-readable streaming instead of a JSON envelope')
   .option('--debug', 'Enable debug logging')