From b99c17a5c8938059f077b834155c4787ad051c32 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Thu, 12 Mar 2026 13:27:09 +0100 Subject: [PATCH 01/11] =?UTF-8?q?feat:=20autoresearch=20integration=20?= =?UTF-8?q?=E2=80=94=20provenance,=20GPU=20marketplace=20skills,=20app=20d?= =?UTF-8?q?eployment=20pattern?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add provenance metadata to ServiceOffer CRD and CLI, new embedded skills for autoresearch model optimization and distributed GPU coordination, and a validated pattern for agent-deployed x402-gated web apps. Closes #264 --- cmd/obol/sell.go | 60 ++ .../base/templates/serviceoffer-crd.yaml | 26 + .../skills/autoresearch-coordinator/SKILL.md | 176 ++++ .../references/coordination-protocol.md | 189 ++++ .../scripts/coordinate.py | 949 ++++++++++++++++++ internal/embed/skills/autoresearch/SKILL.md | 132 +++ .../references/autoresearch-overview.md | 95 ++ .../references/deploy-app-prompt.md | 218 ++++ .../skills/autoresearch/scripts/publish.py | 297 ++++++ .../embed/skills/sell/scripts/monetize.py | 7 + internal/inference/store.go | 15 + 11 files changed, 2164 insertions(+) create mode 100644 internal/embed/skills/autoresearch-coordinator/SKILL.md create mode 100644 internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md create mode 100644 internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py create mode 100644 internal/embed/skills/autoresearch/SKILL.md create mode 100644 internal/embed/skills/autoresearch/references/autoresearch-overview.md create mode 100644 internal/embed/skills/autoresearch/references/deploy-app-prompt.md create mode 100755 internal/embed/skills/autoresearch/scripts/publish.py diff --git a/cmd/obol/sell.go b/cmd/obol/sell.go index 1e99ebc6..8ea3196e 100644 --- a/cmd/obol/sell.go +++ b/cmd/obol/sell.go @@ -142,6 +142,10 @@ Examples: Usage: "SHA-256 of model weights for TEE attestation (required with --tee)", Sources: cli.EnvVars("OBOL_MODEL_HASH"), }, + &cli.StringFlag{ + Name: "provenance-file", + Usage: "Path to JSON file with provenance metadata (e.g. autoresearch experiment results)", + }, }, Action: func(ctx context.Context, cmd *cli.Command) error { name := cmd.Args().First() @@ -200,6 +204,16 @@ Examples: TEEType: teeType, ModelHash: modelHash, } + + if pf := cmd.String("provenance-file"); pf != "" { + prov, err := loadProvenance(pf) + if err != nil { + return fmt.Errorf("load provenance: %w", err) + } + d.Provenance = prov + fmt.Printf("Loaded provenance: %s (metric %s=%s, params %s)\n", + prov.Framework, prov.MetricName, prov.Metric, prov.ParamCount) + } if priceTable.PerMTok != "" { d.ApproxTokensPerRequest = schemas.ApproxTokensPerRequest } @@ -313,6 +327,10 @@ Examples: Name: "register-domains", Usage: "OASF domains for discovery (e.g. technology/artificial_intelligence)", }, + &cli.StringFlag{ + Name: "provenance-file", + Usage: "Path to JSON file with provenance metadata (e.g. autoresearch experiment results)", + }, }, Action: func(ctx context.Context, cmd *cli.Command) error { if cmd.NArg() == 0 { @@ -356,6 +374,35 @@ Examples: spec["path"] = path } + if pf := cmd.String("provenance-file"); pf != "" { + prov, err := loadProvenance(pf) + if err != nil { + return fmt.Errorf("load provenance: %w", err) + } + provMap := map[string]interface{}{} + if prov.Framework != "" { + provMap["framework"] = prov.Framework + } + if prov.Metric != "" { + provMap["metric"] = prov.Metric + } + if prov.MetricName != "" { + provMap["metricName"] = prov.MetricName + } + if prov.ExperimentID != "" { + provMap["experimentId"] = prov.ExperimentID + } + if prov.TrainHash != "" { + provMap["trainHash"] = prov.TrainHash + } + if prov.ParamCount != "" { + provMap["paramCount"] = prov.ParamCount + } + spec["provenance"] = provMap + fmt.Printf("Loaded provenance: %s (metric %s=%s, params %s)\n", + prov.Framework, prov.MetricName, prov.Metric, prov.ParamCount) + } + if cmd.Bool("register") || cmd.String("register-name") != "" { reg := map[string]interface{}{ "enabled": cmd.Bool("register"), @@ -1024,6 +1071,19 @@ func formatInferencePriceSummary(d *inference.Deployment) string { return fmt.Sprintf("%s USDC/request", d.PricePerRequest) } +// loadProvenance reads a provenance JSON file and returns the parsed struct. +func loadProvenance(path string) (*inference.Provenance, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read %s: %w", path, err) + } + var prov inference.Provenance + if err := json.Unmarshal(data, &prov); err != nil { + return nil, fmt.Errorf("parse %s: %w", path, err) + } + return &prov, nil +} + // removePricingRoute removes the x402-verifier pricing route for the given offer. func removePricingRoute(cfg *config.Config, name string) { urlPath := fmt.Sprintf("/services/%s", name) diff --git a/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml b/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml index ee7314fc..03698efb 100644 --- a/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml +++ b/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml @@ -154,6 +154,32 @@ spec: perEpoch: type: string description: "Per-training-epoch price in USDC. Fine-tuning only." + provenance: + type: object + description: >- + Optional provenance metadata for the service. Tracks how the + model or service was produced (e.g. autoresearch experiment data). + Included in the ERC-8004 registration document when present. + properties: + framework: + type: string + description: "Optimization framework (e.g. autoresearch)." + metric: + type: string + description: "Primary quality metric value (e.g. val_bpb score)." + metricName: + type: string + description: "Name of the metric (e.g. val_bpb)." + default: "val_bpb" + experimentId: + type: string + description: "Experiment or commit identifier." + trainHash: + type: string + description: "SHA-256 hash of the training code that produced this model." + paramCount: + type: string + description: "Model parameter count (e.g. 50M, 1.3B)." path: type: string description: "URL path prefix for the HTTPRoute, defaults to /services/." diff --git a/internal/embed/skills/autoresearch-coordinator/SKILL.md b/internal/embed/skills/autoresearch-coordinator/SKILL.md new file mode 100644 index 00000000..e2835e82 --- /dev/null +++ b/internal/embed/skills/autoresearch-coordinator/SKILL.md @@ -0,0 +1,176 @@ +--- +name: autoresearch-coordinator +description: "Coordinate distributed autoresearch experiments across GPU workers discovered via ERC-8004 and paid via x402 micropayments." +metadata: { "openclaw": { "emoji": "\ud83d\udd2c", "requires": { "bins": ["python3", "curl"] } } } +--- + +# Autoresearch Coordinator + +Coordinate distributed autoresearch experiments across GPU workers discovered on-chain via ERC-8004 and paid per-experiment via x402 micropayments. This replaces the Ensue-based shared-memory coordinator from autoresearch-at-home with a fully decentralised discovery and payment loop built on obol-stack primitives. + +## When to Use + +- Discovering GPU workers advertising `machine_learning/model_optimization` capabilities via 8004scan +- Probing worker endpoints for x402 pricing before submitting experiments +- Submitting `train.py` experiments to remote GPU workers through x402 payment gates +- Running the continuous THINK/CLAIM/RUN/PUBLISH experiment loop +- Viewing the global leaderboard of autoresearch results from worker metadata +- Coordinating multi-worker experiment campaigns + +## When NOT to Use + +- Selling your own GPU as a worker -- use `sell` (ServiceOffer with `obol sell http`) +- Buying generic inference (chat completions) -- use `buy-inference` +- Discovering agents without running experiments -- use `discovery` +- Signing transactions directly -- use `ethereum-local-wallet` +- Cluster diagnostics -- use `obol-stack` + +## Quick Start + +```bash +# Discover available GPU workers on 8004scan +python3 scripts/coordinate.py discover + +# Discover with custom limit +python3 scripts/coordinate.py discover --limit 5 + +# Probe a specific worker for pricing +python3 scripts/coordinate.py probe https://worker.example.com/services/autoresearch + +# Submit a single experiment to a worker +python3 scripts/coordinate.py submit https://worker.example.com/services/autoresearch train.py + +# Submit with custom config overrides +python3 scripts/coordinate.py submit https://worker.example.com/services/autoresearch train.py \ + --config '{"batch_size": 64, "learning_rate": 0.001}' + +# View global leaderboard (best val_bpb across all workers) +python3 scripts/coordinate.py leaderboard + +# Run continuous experiment loop (discover -> pick -> submit -> publish) +python3 scripts/coordinate.py loop train.py + +# Loop with worker preference and max rounds +python3 scripts/coordinate.py loop train.py --prefer https://worker.example.com/services/autoresearch --rounds 10 +``` + +## Commands + +| Command | Description | +|---------|-------------| +| `discover [--limit N]` | Query 8004scan for GPU workers with `machine_learning/model_optimization` skill | +| `probe ` | Send unauthenticated request to parse 402 pricing from the worker | +| `submit [--config JSON]` | Submit experiment with x402 payment (pre-sign ERC-3009, attach X-PAYMENT) | +| `leaderboard [--limit N]` | Query 8004scan for all autoresearch workers, rank by best `val_bpb` | +| `loop [--prefer URL] [--rounds N]` | Continuous loop: discover, pick best worker, submit, collect, publish | + +## The Experiment Loop + +The coordinator implements a THINK/CLAIM/RUN/PUBLISH loop: + +1. **THINK** -- Read current `train.py`, review leaderboard results, decide on next experiment variation +2. **CLAIM** -- Discover workers via 8004scan, probe pricing, select best worker (cheapest or preferred) +3. **RUN** -- Submit `train.py` + config to worker via POST `/experiment` through the x402 payment gate +4. **PUBLISH** -- Record result locally with provenance metadata (val_bpb, experiment_id, train.py hash, worker endpoint) + +Each step is atomic and idempotent. If a worker fails mid-experiment, the coordinator retries with the next available worker. + +## How Discovery Works + +Workers register on-chain via ERC-8004 and advertise capabilities through OASF (Open Agent Skills Framework) metadata. The coordinator queries the 8004scan public API: + +``` +GET https://www.8004scan.io/api/v1/public + ?protocol=OASF + &search=machine_learning/model_optimization + &limit=20 +``` + +Each result includes the worker's registration URI, which points to a `.well-known/agent-registration.json` containing: +- Service endpoints (where to POST experiments) +- x402 support flag (payment-gated access) +- OASF skill metadata (GPU type, supported frameworks, pricing tiers) + +## How Payment Works + +Experiment submission uses the same x402 payment flow as `buy-inference`: + +1. **Probe** -- Send unauthenticated POST to worker endpoint, receive `402 Payment Required` with pricing +2. **Sign** -- Pre-sign an ERC-3009 `TransferWithAuthorization` voucher via the remote-signer wallet +3. **Submit** -- Re-send the POST with the `X-PAYMENT` header containing the signed voucher +4. **Settle** -- Worker's x402 verifier validates payment via the facilitator, forwards request to GPU + +Payment is per-experiment (not per-token). The 402 response includes `maxAmountRequired` which is the cost for one experiment run. + +## How Results are Published + +After collecting a result from a worker, the coordinator stores provenance metadata locally: + +```json +{ + "experiment_id": "exp-20260312-a1b2c3", + "train_hash": "sha256:abcdef...", + "val_bpb": 1.234, + "worker_endpoint": "https://worker.example.com/services/autoresearch", + "worker_agent_id": 42, + "timestamp": "2026-03-12T10:30:00Z", + "payment_tx": "0xdeadbeef..." +} +``` + +Results are appended to `$DATA_DIR/autoresearch/results.jsonl` (one JSON object per line). + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `SCAN_API_URL` | `https://www.8004scan.io/api/v1/public` | 8004scan public API endpoint | +| `REMOTE_SIGNER_URL` | `http://remote-signer:9000` | Remote-signer REST API for payment signing | +| `ERPC_URL` | `http://erpc.erpc.svc.cluster.local:4000/rpc` | eRPC gateway base URL | +| `ERPC_NETWORK` | `base-sepolia` | Default chain for payment | +| `DATA_DIR` | `/data` | Base directory for result storage | + +## Architecture + +``` +coordinate.py + | + +-- discover: 8004scan API (OASF filter) + | | + | v + | Worker .well-known/agent-registration.json + | + +-- probe: POST /experiment (no payment) + | | + | v + | 402 Payment Required (pricing JSON) + | + +-- submit: ERC-3009 sign via remote-signer + | | + | +-- POST /experiment + X-PAYMENT header + | | | + | | v + | | x402 verifier -> facilitator -> GPU worker + | | | + | | v + | | 200 + experiment result + | | + | v + +-- publish: results.jsonl (local provenance) +``` + +## Constraints + +- **Requires remote-signer** -- must have agent wallet provisioned via `obol openclaw onboard` +- **Requires network access** -- 8004scan API and worker endpoints must be reachable +- **Python stdlib + requests** -- uses `requests` if available, falls back to `urllib` +- **Per-experiment payment** -- each submission costs one x402 payment; monitor balance via `buy-inference balance` +- **Worker availability** -- workers may go offline between discovery and submission; coordinator retries automatically +- **Result storage is local** -- provenance metadata stored on-disk, not on-chain (future: IPFS/on-chain attestations) + +## References + +- `references/coordination-protocol.md` -- Ensue-to-obol mapping, discovery flow, payment flow, leaderboard format +- See also: `discovery` skill for raw ERC-8004 registry queries +- See also: `buy-inference` skill for the x402 buyer sidecar architecture +- See also: `sell` skill for running a GPU worker (sell-side) diff --git a/internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md b/internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md new file mode 100644 index 00000000..179dfb08 --- /dev/null +++ b/internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md @@ -0,0 +1,189 @@ +# Coordination Protocol Reference + +How the autoresearch coordinator maps from the original Ensue-based shared-memory model to obol-stack's decentralised primitives. + +## Ensue to obol-stack Mapping + +| Ensue Concept | obol-stack Equivalent | Notes | +|---|---|---| +| Shared memory (Redis/filesystem) | ERC-8004 on-chain registry + 8004scan API | Workers register capabilities on-chain; coordinator discovers via API | +| Task queue (Ensue scheduler) | Direct HTTP POST to worker `/experiment` endpoint | No central queue; coordinator submits directly to chosen worker | +| Worker discovery (static config) | 8004scan OASF query (`machine_learning/model_optimization`) | Dynamic discovery; workers join/leave without coordinator restart | +| Payment (none / trust-based) | x402 micropayments (USDC via ERC-3009 pre-signed auths) | Per-experiment payment; no credit accounts or invoicing | +| Result aggregation (shared DB) | Local `results.jsonl` + worker `.well-known` metadata | Coordinator stores locally; workers publish best scores in registration | +| Leaderboard (centralized) | 8004scan metadata aggregation | Workers self-report best `val_bpb` in their registration JSON | +| Worker health checks (heartbeat) | x402 probe (402 = alive, timeout = dead) | Payment gate doubles as health check | +| Experiment versioning (git) | SHA-256 hash of `train.py` (`train_hash` field) | Immutable reference to exact code submitted | + +## Discovery Flow + +``` +Coordinator 8004scan Worker + | | | + |-- GET /api/v1/public ------------->| | + | ?protocol=OASF | | + | &search=machine_learning/ | | + | model_optimization | | + | &limit=20 | | + | | | + |<-- [{agentId, name, uri}, ...] ----| | + | | | + |-- GET (registration JSON) -------------------------------->| + | | + |<-- {services: [{endpoint}], x402Support: true, oasf: [...]} -----| + | | + | (extract endpoint, verify x402 + OASF skill match) | +``` + +### 8004scan API Parameters + +| Parameter | Type | Description | +|---|---|---| +| `protocol` | string | Filter by protocol: `OASF`, `MCP`, `A2A`, `Web`, `Email` | +| `search` | string | Keyword search across name, description, skills | +| `chainId` | int | Filter by chain (e.g., 84532 for Base Sepolia) | +| `ownerAddress` | address | Filter by registration owner | +| `sortBy` | string | Sort field (e.g., `registeredAt`) | +| `limit` | int | Max results to return | + +### Worker Registration JSON + +Workers advertise capabilities in their `.well-known/agent-registration.json`: + +```json +{ + "type": "https://eips.ethereum.org/EIPS/eip-8004#registration-v1", + "name": "GPU Worker Alpha", + "description": "A100 GPU worker for autoresearch experiments", + "services": [ + { + "name": "autoresearch", + "endpoint": "https://worker.example.com/services/autoresearch", + "version": "1.0.0" + } + ], + "x402Support": true, + "oasf": [ + { + "domain": "machine_learning/model_optimization", + "capabilities": ["training", "hyperparameter_search", "validation"] + } + ], + "metadata": { + "gpu": "A100-80GB", + "framework": "pytorch", + "best_val_bpb": 1.234, + "total_experiments": 42, + "updated": "2026-03-12T10:30:00Z" + }, + "active": true +} +``` + +## Payment Flow + +``` +Coordinator Worker (x402 gate) Facilitator Chain + | | | | + |-- POST /experiment --------->| | | + | (no X-PAYMENT header) | | | + | | | | + |<-- 402 Payment Required -----| | | + | {payTo, network, | | | + | maxAmountRequired} | | | + | | | | + |-- sign ERC-3009 auth --------|---------------------------|----------------->| + | (via remote-signer) | | | + | | | | + |-- POST /experiment --------->| | | + | X-PAYMENT: {signature, | | | + | authorization, chain, |-- verify payment -------->| | + | token} | |-- settle USDC -->| + | body: {train_py, config} |<-- 200 OK (valid) -------| | + | | | | + | |-- run experiment | | + | | (GPU training) | | + | | | | + |<-- 200 {val_bpb, metrics} ---| | | +``` + +### ERC-3009 Authorization Structure + +Each payment authorization contains: + +| Field | Type | Description | +|---|---|---| +| `from` | address | Coordinator's wallet (agent wallet from remote-signer) | +| `to` | address | Worker's `payTo` address (from 402 response) | +| `value` | uint256 | Payment amount in USDC micro-units | +| `validAfter` | uint256 | Unix timestamp (0 = immediately valid) | +| `validBefore` | uint256 | Unix timestamp (current time + 1 hour) | +| `nonce` | bytes32 | Random 32-byte nonce (single-use, prevents replay) | + +### X-PAYMENT Header Format + +```json +{ + "signature": "0x...", + "authorization": { + "from": "0xCoordinatorWallet", + "to": "0xWorkerPayTo", + "value": "1000", + "validAfter": "0", + "validBefore": "1741784400", + "nonce": "0xrandom32bytes..." + }, + "chain": "base-sepolia", + "token": "0x036CbD53842c5426634e7929541eC2318f3dCF7e" +} +``` + +## Leaderboard Metadata Format + +Workers publish their best results in the `.well-known/agent-registration.json` metadata section. The coordinator aggregates these via 8004scan queries. + +### Required Fields + +| Field | Type | Description | +|---|---|---| +| `metadata.best_val_bpb` | float | Best validation bits-per-byte achieved | +| `metadata.total_experiments` | int | Total experiments processed by this worker | +| `metadata.updated` | string | ISO 8601 timestamp of last result update | + +### Optional Fields + +| Field | Type | Description | +|---|---|---| +| `metadata.gpu` | string | GPU model (e.g., `A100-80GB`, `H100`) | +| `metadata.framework` | string | Training framework (e.g., `pytorch`, `jax`) | +| `metadata.best_experiment_hash` | string | SHA-256 hash of the train.py that produced the best result | +| `metadata.avg_experiment_time` | float | Average seconds per experiment | + +### Leaderboard Ranking + +The coordinator ranks workers by `metadata.best_val_bpb` in ascending order (lower is better). When querying the leaderboard: + +1. Fetch all workers with `machine_learning/model_optimization` skill from 8004scan +2. For each worker, fetch their registration JSON +3. Extract `metadata.best_val_bpb` (skip workers without this field) +4. Sort ascending by `val_bpb` +5. Display rank, score, agent name, and last update time + +### Local Results Format + +The coordinator also maintains a local `results.jsonl` file for provenance tracking. Each line is a JSON object: + +```json +{ + "experiment_id": "exp-20260312-a1b2c3", + "train_hash": "sha256:abcdef1234567890...", + "val_bpb": 1.234, + "worker_endpoint": "https://worker.example.com/services/autoresearch", + "worker_agent_id": 42, + "timestamp": "2026-03-12T10:30:00Z", + "chain": "base-sepolia", + "raw_result": { "...worker response..." } +} +``` + +The `experiment_id` format is `exp-YYYYMMDD-XXXXXX` where `XXXXXX` is 6 random hex chars. The `train_hash` is the SHA-256 of the exact `train.py` source submitted, providing an immutable reference for reproducibility. diff --git a/internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py b/internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py new file mode 100644 index 00000000..19959ade --- /dev/null +++ b/internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py @@ -0,0 +1,949 @@ +#!/usr/bin/env python3 +"""coordinate.py -- Distributed autoresearch coordinator via obol-stack. + +Discovers GPU workers registered on ERC-8004 via the 8004scan API, probes +their x402 pricing, submits experiments with micropayments, and tracks +results with local provenance metadata. + +Replaces the Ensue-based shared-memory coordinator from autoresearch-at-home +with decentralised discovery (ERC-8004) and payment (x402). + +Usage: + python3 coordinate.py [args] + +Commands: + discover [--limit N] List GPU workers from 8004scan + probe Check x402 pricing for a worker + submit [--config JSON] Submit experiment with payment + leaderboard [--limit N] Global rankings by val_bpb + loop [--prefer URL] [--rounds N] Continuous experiment loop +""" + +import argparse +import hashlib +import json +import os +import sys +import time +import urllib.error +import urllib.request +from datetime import datetime, timezone + +# --------------------------------------------------------------------------- +# Import shared helpers from sibling skills +# --------------------------------------------------------------------------- + +SKILL_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +SIGNER_SCRIPTS = os.path.join(os.path.dirname(SKILL_DIR), "ethereum-local-wallet", "scripts") +sys.path.insert(0, SIGNER_SCRIPTS) + +try: + from signer import _signer_get, _signer_post # noqa: E402 + HAS_SIGNER = True +except ImportError: + HAS_SIGNER = False + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +SCAN_API_URL = os.environ.get( + "SCAN_API_URL", "https://www.8004scan.io/api/v1/public" +) +REMOTE_SIGNER_URL = os.environ.get("REMOTE_SIGNER_URL", "http://remote-signer:9000") +ERPC_URL = os.environ.get("ERPC_URL", "http://erpc.erpc.svc.cluster.local:4000/rpc") +DEFAULT_CHAIN = os.environ.get("ERPC_NETWORK", "base-sepolia") +DATA_DIR = os.environ.get("DATA_DIR", "/data") + +RESULTS_DIR = os.path.join(DATA_DIR, "autoresearch") +RESULTS_FILE = os.path.join(RESULTS_DIR, "results.jsonl") + +OASF_SKILL_FILTER = "machine_learning/model_optimization" + +CHAIN_IDS = { + "base-sepolia": 84532, + "base": 8453, + "ethereum": 1, + "mainnet": 1, + "sepolia": 11155111, +} + +USDC_CONTRACTS = { + "base-sepolia": "0x036CbD53842c5426634e7929541eC2318f3dCF7e", + "base": "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913", + "ethereum": "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48", +} + +USDC_DOMAIN_NAME = "USDC" +USDC_DOMAIN_VERSION = "2" + +DEFAULT_LOOP_DELAY = 60 # seconds between loop iterations + + +# --------------------------------------------------------------------------- +# HTTP helpers +# --------------------------------------------------------------------------- + +def _http_get(url, headers=None, timeout=30): + """GET request returning parsed JSON.""" + hdrs = {"Accept": "application/json", "User-Agent": "obol-autoresearch/1.0"} + if headers: + hdrs.update(headers) + req = urllib.request.Request(url, headers=hdrs, method="GET") + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read()) + + +def _http_post(url, body, headers=None, timeout=120): + """POST request returning (status_code, headers_dict, body_bytes).""" + hdrs = {"Content-Type": "application/json", "User-Agent": "obol-autoresearch/1.0"} + if headers: + hdrs.update(headers) + data = json.dumps(body).encode() if isinstance(body, dict) else body + req = urllib.request.Request(url, data=data, headers=hdrs, method="POST") + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return resp.status, dict(resp.headers), resp.read() + except urllib.error.HTTPError as e: + return e.code, dict(e.headers), e.read() + + +# --------------------------------------------------------------------------- +# 8004scan API +# --------------------------------------------------------------------------- + +def query_8004scan(protocol="OASF", search=None, limit=20, chain_id=None, + sort_by=None, owner_address=None): + """Query the 8004scan public API for registered agents. + + Supports filtering by protocol (MCP/A2A/OASF/Web/Email), keyword search, + chainId, ownerAddress, and sorting. + + Returns list of agent registration objects. + """ + params = [f"limit={limit}"] + if protocol: + params.append(f"protocol={protocol}") + if search: + params.append(f"search={urllib.request.quote(search)}") + if chain_id: + params.append(f"chainId={chain_id}") + if sort_by: + params.append(f"sortBy={sort_by}") + if owner_address: + params.append(f"ownerAddress={owner_address}") + + url = f"{SCAN_API_URL}?{'&'.join(params)}" + try: + result = _http_get(url) + # API may return {data: [...]} or directly [...] + if isinstance(result, dict): + return result.get("data", result.get("items", [])) + if isinstance(result, list): + return result + return [] + except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError) as e: + print(f"Error querying 8004scan: {e}", file=sys.stderr) + return [] + + +def fetch_registration_json(uri): + """Fetch the .well-known/agent-registration.json from a worker's URI.""" + if not uri: + return None + if not (uri.startswith("http://") or uri.startswith("https://")): + return None + try: + return _http_get(uri, timeout=15) + except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError) as e: + print(f" Warning: Failed to fetch {uri}: {e}", file=sys.stderr) + return None + + +def extract_worker_endpoint(registration): + """Extract the experiment submission endpoint from a registration JSON. + + Looks for a service with x402Support and an endpoint path containing + '/services/' or '/experiment'. + """ + if not registration: + return None + + services = registration.get("services", []) + for svc in services: + endpoint = svc.get("endpoint", "") + if "/services/" in endpoint or "/experiment" in endpoint: + return endpoint + + # Fallback: if x402Support is true, try constructing from the first service + if registration.get("x402Support") and services: + return services[0].get("endpoint") + + return None + + +def extract_oasf_skills(registration): + """Extract OASF skill domains from registration metadata.""" + skills = [] + oasf = registration.get("oasf", registration.get("skills", [])) + if isinstance(oasf, list): + for s in oasf: + if isinstance(s, dict): + skills.append(s.get("domain", s.get("name", str(s)))) + elif isinstance(s, str): + skills.append(s) + return skills + + +# --------------------------------------------------------------------------- +# x402 payment helpers +# --------------------------------------------------------------------------- + +def parse_402_pricing(headers, body): + """Parse pricing info from a 402 Payment Required response. + + Returns dict with: payTo, network, maxAmountRequired, facilitatorURL, + or None if unparseable. + """ + try: + data = json.loads(body) if isinstance(body, bytes) else body + except (json.JSONDecodeError, TypeError): + data = {} + + # x402 pricing can be in response body or headers + pricing = {} + + # Try body fields + for key in ("payTo", "network", "maxAmountRequired", "facilitatorURL", + "price", "priceModel", "description"): + if key in data: + pricing[key] = data[key] + + # Try x402-specific headers + if "X-Payment-PayTo" in headers: + pricing["payTo"] = headers["X-Payment-PayTo"] + if "X-Payment-Network" in headers: + pricing["network"] = headers["X-Payment-Network"] + if "X-Payment-Amount" in headers: + pricing["maxAmountRequired"] = headers["X-Payment-Amount"] + + # Also check for nested pricing structure + if "pricing" in data and isinstance(data["pricing"], dict): + pricing.update(data["pricing"]) + + if not pricing.get("payTo") and not pricing.get("maxAmountRequired"): + return None + + return pricing + + +def sign_erc3009_auth(pay_to, amount, chain=None): + """Sign an ERC-3009 TransferWithAuthorization via the remote-signer. + + Returns the signed authorization dict suitable for an X-PAYMENT header, + or None on failure. + """ + if not HAS_SIGNER: + print("Error: remote-signer helpers not available", file=sys.stderr) + return None + + network = chain or DEFAULT_CHAIN + chain_id = CHAIN_IDS.get(network) + usdc = USDC_CONTRACTS.get(network) + if not chain_id or not usdc: + print(f"Error: unsupported chain '{network}' for payment", file=sys.stderr) + return None + + # Get signer address + try: + info = _signer_get("/api/v1/eth2/publicKeys") + if not info: + print("Error: no keys in remote-signer", file=sys.stderr) + return None + except Exception as e: + print(f"Error contacting remote-signer: {e}", file=sys.stderr) + return None + + # Generate random nonce (32 bytes) + nonce = "0x" + os.urandom(32).hex() + + # Valid for 1 hour + valid_after = 0 + valid_before = int(time.time()) + 3600 + + # EIP-712 typed data for TransferWithAuthorization + typed_data = { + "types": { + "EIP712Domain": [ + {"name": "name", "type": "string"}, + {"name": "version", "type": "string"}, + {"name": "chainId", "type": "uint256"}, + {"name": "verifyingContract", "type": "address"}, + ], + "TransferWithAuthorization": [ + {"name": "from", "type": "address"}, + {"name": "to", "type": "address"}, + {"name": "value", "type": "uint256"}, + {"name": "validAfter", "type": "uint256"}, + {"name": "validBefore", "type": "uint256"}, + {"name": "nonce", "type": "bytes32"}, + ], + }, + "primaryType": "TransferWithAuthorization", + "domain": { + "name": USDC_DOMAIN_NAME, + "version": USDC_DOMAIN_VERSION, + "chainId": chain_id, + "verifyingContract": usdc, + }, + "message": { + "from": info[0] if isinstance(info, list) else info, + "to": pay_to, + "value": str(amount), + "validAfter": str(valid_after), + "validBefore": str(valid_before), + "nonce": nonce, + }, + } + + try: + sig = _signer_post("/api/v1/eth2/sign", typed_data) + if not sig: + print("Error: remote-signer returned empty signature", file=sys.stderr) + return None + return { + "signature": sig, + "authorization": typed_data["message"], + "chain": network, + "token": usdc, + } + except Exception as e: + print(f"Error signing authorization: {e}", file=sys.stderr) + return None + + +def build_x_payment_header(signed_auth): + """Encode a signed authorization as an X-PAYMENT header value (JSON).""" + if not signed_auth: + return None + return json.dumps(signed_auth) + + +# --------------------------------------------------------------------------- +# Result provenance +# --------------------------------------------------------------------------- + +def _ensure_results_dir(): + """Create the results directory if it does not exist.""" + os.makedirs(RESULTS_DIR, exist_ok=True) + + +def save_result(result): + """Append an experiment result to the local results.jsonl.""" + _ensure_results_dir() + with open(RESULTS_FILE, "a") as f: + f.write(json.dumps(result) + "\n") + + +def load_results(): + """Load all experiment results from results.jsonl.""" + if not os.path.exists(RESULTS_FILE): + return [] + results = [] + with open(RESULTS_FILE, "r") as f: + for line in f: + line = line.strip() + if line: + try: + results.append(json.loads(line)) + except json.JSONDecodeError: + continue + return results + + +def compute_train_hash(train_py_path): + """Compute SHA-256 hash of a train.py file.""" + h = hashlib.sha256() + with open(train_py_path, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + h.update(chunk) + return f"sha256:{h.hexdigest()}" + + +def generate_experiment_id(): + """Generate a unique experiment ID.""" + ts = datetime.now(timezone.utc).strftime("%Y%m%d") + suffix = os.urandom(3).hex() + return f"exp-{ts}-{suffix}" + + +# --------------------------------------------------------------------------- +# ObolCoordinator +# --------------------------------------------------------------------------- + +class ObolCoordinator: + """Coordinates distributed autoresearch experiments via obol-stack. + + Discovers GPU workers through 8004scan, pays per-experiment via x402, + and tracks results with local provenance metadata. + """ + + def __init__(self, chain=None): + self.chain = chain or DEFAULT_CHAIN + + def discover_workers(self, limit=20): + """Query 8004scan for workers advertising machine_learning/model_optimization. + + Returns list of dicts with keys: name, endpoint, uri, agent_id, skills, x402. + """ + agents = query_8004scan( + protocol="OASF", + search=OASF_SKILL_FILTER, + limit=limit, + ) + + workers = [] + for agent in agents: + uri = agent.get("uri", agent.get("tokenURI", "")) + name = agent.get("name", f"agent-{agent.get('agentId', '?')}") + agent_id = agent.get("agentId", agent.get("id")) + + # Fetch full registration to get endpoint + registration = fetch_registration_json(uri) if uri else None + endpoint = extract_worker_endpoint(registration) if registration else None + skills = extract_oasf_skills(registration) if registration else [] + x402 = (registration or {}).get("x402Support", False) + + workers.append({ + "name": name, + "endpoint": endpoint, + "uri": uri, + "agent_id": agent_id, + "skills": skills, + "x402": x402, + "registration": registration, + }) + + return workers + + def probe_worker(self, endpoint): + """Send unauthenticated request to worker, parse 402 for pricing. + + Returns pricing dict or None if the worker is not x402-gated. + """ + # Send a minimal experiment probe (empty body triggers 402 before processing) + probe_body = {"probe": True} + status, headers, body = _http_post( + endpoint.rstrip("/") + "/experiment", + probe_body, + timeout=30, + ) + + if status == 402: + pricing = parse_402_pricing(headers, body) + if pricing: + pricing["status"] = 402 + pricing["endpoint"] = endpoint + return pricing + print(f" Got 402 but could not parse pricing from {endpoint}", file=sys.stderr) + return None + + if 200 <= status < 300: + print(f" Worker at {endpoint} returned {status} (no payment gate)") + return {"status": status, "endpoint": endpoint, "free": True} + + print(f" Worker at {endpoint} returned unexpected status {status}", file=sys.stderr) + return None + + def submit_experiment(self, endpoint, train_py_source, config=None): + """Submit an experiment to a worker with x402 payment. + + Args: + endpoint: Worker's base endpoint URL + train_py_source: Contents of train.py as a string + config: Optional dict of config overrides for the experiment + + Returns result dict from the worker, or None on failure. + """ + experiment_url = endpoint.rstrip("/") + "/experiment" + + # Step 1: Probe for pricing + probe_body = {"probe": True} + status, headers, body = _http_post(experiment_url, probe_body, timeout=30) + + if status != 402: + if 200 <= status < 300: + # No payment gate -- submit directly + print(" Worker has no payment gate, submitting directly...") + return self._submit_direct(experiment_url, train_py_source, config) + print(f" Probe failed with status {status}", file=sys.stderr) + return None + + # Step 2: Parse pricing + pricing = parse_402_pricing(headers, body) + if not pricing: + print(" Could not parse 402 pricing", file=sys.stderr) + return None + + pay_to = pricing.get("payTo") + amount = pricing.get("maxAmountRequired", pricing.get("price")) + if not pay_to or not amount: + print(" Pricing missing payTo or amount", file=sys.stderr) + return None + + print(f" Price: {amount} USDC micro-units to {pay_to}") + + # Step 3: Sign ERC-3009 authorization + signed_auth = sign_erc3009_auth(pay_to, int(amount), self.chain) + if not signed_auth: + print(" Failed to sign payment authorization", file=sys.stderr) + return None + + # Step 4: Submit with payment + payment_header = build_x_payment_header(signed_auth) + submit_body = { + "train_py": train_py_source, + "config": config or {}, + } + submit_headers = {"X-PAYMENT": payment_header} + + print(" Submitting experiment with payment...") + status2, headers2, body2 = _http_post( + experiment_url, submit_body, headers=submit_headers, timeout=600 + ) + + if 200 <= status2 < 300: + try: + return json.loads(body2) + except json.JSONDecodeError: + return {"raw": body2.decode("utf-8", errors="replace"), "status": status2} + + print(f" Submission failed with status {status2}", file=sys.stderr) + try: + err = json.loads(body2) + print(f" Response: {json.dumps(err, indent=2)}", file=sys.stderr) + except (json.JSONDecodeError, TypeError): + print(f" Response: {body2[:500]}", file=sys.stderr) + return None + + def _submit_direct(self, url, train_py_source, config=None): + """Submit experiment without payment (free worker).""" + submit_body = { + "train_py": train_py_source, + "config": config or {}, + } + status, _, body = _http_post(url, submit_body, timeout=600) + if 200 <= status < 300: + try: + return json.loads(body) + except json.JSONDecodeError: + return {"raw": body.decode("utf-8", errors="replace"), "status": status} + return None + + def publish_result(self, val_bpb, experiment_id, train_hash, + worker_endpoint=None, worker_agent_id=None, extra=None): + """Store experiment result with provenance metadata locally.""" + result = { + "experiment_id": experiment_id, + "train_hash": train_hash, + "val_bpb": val_bpb, + "worker_endpoint": worker_endpoint, + "worker_agent_id": worker_agent_id, + "timestamp": datetime.now(timezone.utc).isoformat(), + "chain": self.chain, + } + if extra: + result.update(extra) + save_result(result) + return result + + def get_leaderboard(self, limit=20): + """Query 8004scan for autoresearch workers and rank by best val_bpb. + + Fetches worker registration metadata and extracts reported val_bpb + scores from their .well-known metadata. + """ + agents = query_8004scan( + protocol="OASF", + search=OASF_SKILL_FILTER, + limit=limit * 2, # fetch extra to account for workers without results + ) + + entries = [] + for agent in agents: + uri = agent.get("uri", agent.get("tokenURI", "")) + name = agent.get("name", f"agent-{agent.get('agentId', '?')}") + registration = fetch_registration_json(uri) if uri else None + if not registration: + continue + + # Look for autoresearch results in metadata + meta = registration.get("metadata", registration.get("autoresearch", {})) + if isinstance(meta, dict): + val_bpb = meta.get("best_val_bpb", meta.get("val_bpb")) + if val_bpb is not None: + entries.append({ + "name": name, + "agent_id": agent.get("agentId", agent.get("id")), + "val_bpb": float(val_bpb), + "uri": uri, + "updated": meta.get("updated", ""), + }) + + # Sort by val_bpb ascending (lower is better) + entries.sort(key=lambda e: e["val_bpb"]) + return entries[:limit] + + def run_loop(self, train_py_path, prefer_endpoint=None, max_rounds=None): + """Run the continuous THINK/CLAIM/RUN/PUBLISH loop. + + Args: + train_py_path: Path to the train.py file + prefer_endpoint: Optional preferred worker endpoint + max_rounds: Max iterations (None = infinite) + """ + if not os.path.exists(train_py_path): + print(f"Error: {train_py_path} not found", file=sys.stderr) + return + + train_hash = compute_train_hash(train_py_path) + with open(train_py_path, "r") as f: + train_source = f.read() + + round_num = 0 + while max_rounds is None or round_num < max_rounds: + round_num += 1 + print(f"\n{'='*60}") + print(f"Round {round_num}") + print(f"{'='*60}") + + # THINK: Review current state + results = load_results() + best = min((r["val_bpb"] for r in results if "val_bpb" in r), default=None) + if best is not None: + print(f" Current best val_bpb: {best:.4f} ({len(results)} experiments)") + else: + print(" No previous results") + + # CLAIM: Discover and select worker + print("\n Discovering workers...") + if prefer_endpoint: + # Use preferred endpoint directly + print(f" Using preferred worker: {prefer_endpoint}") + endpoint = prefer_endpoint + else: + workers = self.discover_workers(limit=10) + available = [w for w in workers if w.get("endpoint") and w.get("x402")] + if not available: + print(" No available workers found. Waiting before retry...") + time.sleep(DEFAULT_LOOP_DELAY) + continue + # Pick first available (could be enhanced with pricing comparison) + worker = available[0] + endpoint = worker["endpoint"] + print(f" Selected worker: {worker['name']} at {endpoint}") + + # Probe pricing + print("\n Probing pricing...") + pricing = self.probe_worker(endpoint) + if not pricing: + print(" Could not get pricing. Skipping this round.") + time.sleep(DEFAULT_LOOP_DELAY) + continue + if not pricing.get("free"): + print(f" Cost: {pricing.get('maxAmountRequired', pricing.get('price', '?'))} USDC micro-units") + + # RUN: Submit experiment + print("\n Submitting experiment...") + experiment_id = generate_experiment_id() + result = self.submit_experiment(endpoint, train_source) + if not result: + print(" Experiment submission failed. Trying next round.") + time.sleep(DEFAULT_LOOP_DELAY) + continue + + # PUBLISH: Record result + val_bpb = result.get("val_bpb", result.get("metrics", {}).get("val_bpb")) + if val_bpb is not None: + published = self.publish_result( + val_bpb=float(val_bpb), + experiment_id=experiment_id, + train_hash=train_hash, + worker_endpoint=endpoint, + extra={"raw_result": result}, + ) + print(f"\n Result: val_bpb = {val_bpb:.4f}") + print(f" Saved as {experiment_id}") + if best is not None and float(val_bpb) < best: + print(f" NEW BEST! (improved from {best:.4f})") + else: + print(f"\n Experiment completed but no val_bpb in result:") + print(f" {json.dumps(result, indent=2)[:500]}") + # Still save for provenance + self.publish_result( + val_bpb=None, + experiment_id=experiment_id, + train_hash=train_hash, + worker_endpoint=endpoint, + extra={"raw_result": result, "note": "no val_bpb returned"}, + ) + + if max_rounds is not None and round_num >= max_rounds: + print(f"\n Completed {max_rounds} rounds.") + break + + print(f"\n Waiting {DEFAULT_LOOP_DELAY}s before next round...") + time.sleep(DEFAULT_LOOP_DELAY) + + +# --------------------------------------------------------------------------- +# CLI commands +# --------------------------------------------------------------------------- + +def cmd_discover(args): + """List available GPU workers from 8004scan.""" + limit = args.limit or 20 + print(f"Discovering GPU workers with OASF skill '{OASF_SKILL_FILTER}'...") + print(f" API: {SCAN_API_URL}") + print() + + coordinator = ObolCoordinator(chain=args.chain) + workers = coordinator.discover_workers(limit=limit) + + if not workers: + print("No workers found.") + return + + print(f"Found {len(workers)} worker(s):\n") + print(f"{'Name':30} {'Agent ID':>10} {'x402':>5} Endpoint") + print(f"{'-'*30} {'-'*10} {'-'*5} {'-'*50}") + + for w in workers: + x402_str = "yes" if w.get("x402") else "no" + endpoint = w.get("endpoint") or "(none)" + name = (w.get("name") or "?")[:30] + agent_id = w.get("agent_id", "?") + print(f"{name:30} {str(agent_id):>10} {x402_str:>5} {endpoint}") + + if w.get("skills"): + print(f"{'':30} {'':>10} {'':>5} Skills: {', '.join(w['skills'][:5])}") + + +def cmd_probe(args): + """Probe a worker endpoint for x402 pricing.""" + endpoint = args.endpoint.rstrip("/") + print(f"Probing {endpoint} ...") + print() + + coordinator = ObolCoordinator(chain=args.chain) + pricing = coordinator.probe_worker(endpoint) + + if not pricing: + print("Could not get pricing info from this endpoint.") + sys.exit(1) + + if pricing.get("free"): + print("This worker has no payment gate (free access).") + return + + print("x402 Pricing:") + print(f" Status: {pricing.get('status', '?')}") + print(f" Pay To: {pricing.get('payTo', '?')}") + print(f" Network: {pricing.get('network', '?')}") + print(f" Amount: {pricing.get('maxAmountRequired', pricing.get('price', '?'))} USDC micro-units") + if pricing.get("priceModel"): + print(f" Price Model: {pricing['priceModel']}") + if pricing.get("description"): + print(f" Description: {pricing['description']}") + if pricing.get("facilitatorURL"): + print(f" Facilitator: {pricing['facilitatorURL']}") + + +def cmd_submit(args): + """Submit a single experiment to a worker.""" + endpoint = args.endpoint.rstrip("/") + train_py_path = args.train_py + + if not os.path.exists(train_py_path): + print(f"Error: {train_py_path} not found", file=sys.stderr) + sys.exit(1) + + config = None + if args.config: + try: + config = json.loads(args.config) + except json.JSONDecodeError as e: + print(f"Error: invalid JSON config: {e}", file=sys.stderr) + sys.exit(1) + + with open(train_py_path, "r") as f: + train_source = f.read() + + train_hash = compute_train_hash(train_py_path) + experiment_id = generate_experiment_id() + + print(f"Submitting experiment to {endpoint}") + print(f" train.py: {train_py_path}") + print(f" train hash: {train_hash}") + print(f" experiment ID: {experiment_id}") + if config: + print(f" config: {json.dumps(config)}") + print() + + coordinator = ObolCoordinator(chain=args.chain) + result = coordinator.submit_experiment(endpoint, train_source, config) + + if not result: + print("Experiment submission failed.", file=sys.stderr) + sys.exit(1) + + val_bpb = result.get("val_bpb", result.get("metrics", {}).get("val_bpb")) + + # Publish provenance + coordinator.publish_result( + val_bpb=float(val_bpb) if val_bpb is not None else None, + experiment_id=experiment_id, + train_hash=train_hash, + worker_endpoint=endpoint, + extra={"raw_result": result}, + ) + + print("Experiment completed!") + print(f" Result: {json.dumps(result, indent=2)[:1000]}") + if val_bpb is not None: + print(f" val_bpb: {val_bpb}") + print(f" Saved to {RESULTS_FILE}") + + +def cmd_leaderboard(args): + """Show global leaderboard from 8004scan worker metadata.""" + limit = args.limit or 20 + print(f"Fetching global autoresearch leaderboard...") + print() + + coordinator = ObolCoordinator(chain=args.chain) + entries = coordinator.get_leaderboard(limit=limit) + + if not entries: + # Fall back to local results + print("No leaderboard data from 8004scan. Showing local results:\n") + results = load_results() + if not results: + print("No local results either.") + return + + # Group by worker, show best per worker + by_worker = {} + for r in results: + key = r.get("worker_endpoint", "local") + if r.get("val_bpb") is not None: + if key not in by_worker or r["val_bpb"] < by_worker[key]["val_bpb"]: + by_worker[key] = r + + sorted_workers = sorted(by_worker.values(), key=lambda r: r["val_bpb"]) + print(f"{'Rank':>5} {'val_bpb':>10} {'Experiment':20} Worker") + print(f"{'-'*5} {'-'*10} {'-'*20} {'-'*40}") + for i, r in enumerate(sorted_workers[:limit], 1): + print(f"{i:>5} {r['val_bpb']:>10.4f} {r.get('experiment_id', '?'):20} {r.get('worker_endpoint', '?')}") + return + + print(f"{'Rank':>5} {'val_bpb':>10} {'Agent ID':>10} {'Name':30} Updated") + print(f"{'-'*5} {'-'*10} {'-'*10} {'-'*30} {'-'*20}") + for i, e in enumerate(entries, 1): + name = (e.get("name") or "?")[:30] + print(f"{i:>5} {e['val_bpb']:>10.4f} {str(e.get('agent_id', '?')):>10} {name:30} {e.get('updated', '?')}") + + +def cmd_loop(args): + """Run the continuous experiment loop.""" + train_py_path = args.train_py + prefer = args.prefer + rounds = args.rounds + + if not os.path.exists(train_py_path): + print(f"Error: {train_py_path} not found", file=sys.stderr) + sys.exit(1) + + coordinator = ObolCoordinator(chain=args.chain) + print(f"Starting experiment loop") + print(f" train.py: {train_py_path}") + print(f" chain: {coordinator.chain}") + if prefer: + print(f" prefer: {prefer}") + if rounds: + print(f" rounds: {rounds}") + print() + + try: + coordinator.run_loop(train_py_path, prefer_endpoint=prefer, max_rounds=rounds) + except KeyboardInterrupt: + print("\n\nLoop interrupted by user.") + results = load_results() + if results: + best = min((r["val_bpb"] for r in results if r.get("val_bpb") is not None), default=None) + print(f"Total experiments: {len(results)}") + if best is not None: + print(f"Best val_bpb: {best:.4f}") + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description="Coordinate distributed autoresearch experiments via obol-stack" + ) + parser.add_argument("--chain", default=None, help="Chain/network for payments (default: base-sepolia)") + sub = parser.add_subparsers(dest="command", help="Command to run") + + # discover + p_discover = sub.add_parser("discover", help="List available GPU workers") + p_discover.add_argument("--limit", type=int, default=20, help="Max results (default: 20)") + + # probe + p_probe = sub.add_parser("probe", help="Check x402 pricing for a worker") + p_probe.add_argument("endpoint", help="Worker endpoint URL") + + # submit + p_submit = sub.add_parser("submit", help="Submit experiment to a worker") + p_submit.add_argument("endpoint", help="Worker endpoint URL") + p_submit.add_argument("train_py", help="Path to train.py") + p_submit.add_argument("--config", default=None, help="JSON config overrides") + + # leaderboard + p_leader = sub.add_parser("leaderboard", help="Show global rankings") + p_leader.add_argument("--limit", type=int, default=20, help="Max results (default: 20)") + + # loop + p_loop = sub.add_parser("loop", help="Run continuous experiment loop") + p_loop.add_argument("train_py", help="Path to train.py") + p_loop.add_argument("--prefer", default=None, help="Preferred worker endpoint URL") + p_loop.add_argument("--rounds", type=int, default=None, help="Max rounds (default: infinite)") + + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + commands = { + "discover": cmd_discover, + "probe": cmd_probe, + "submit": cmd_submit, + "leaderboard": cmd_leaderboard, + "loop": cmd_loop, + } + + try: + commands[args.command](args) + except KeyboardInterrupt: + print("\nInterrupted.", file=sys.stderr) + sys.exit(130) + except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e: + print(f"Network error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/internal/embed/skills/autoresearch/SKILL.md b/internal/embed/skills/autoresearch/SKILL.md new file mode 100644 index 00000000..e1a589cb --- /dev/null +++ b/internal/embed/skills/autoresearch/SKILL.md @@ -0,0 +1,132 @@ +--- +name: autoresearch +description: "Run autonomous LLM optimization experiments (autoresearch) and publish optimized models for paid inference via x402." +metadata: { "openclaw": { "emoji": "πŸ§ͺ", "requires": { "bins": ["python3", "uv"] } } } +--- + +# Autoresearch + +Autonomous LLM optimization: the agent iterates on `train.py`, runs 5-minute GPU experiments, measures validation bits-per-byte (val_bpb), and publishes the best checkpoint as a sellable Ollama model. + +## When to Use + +- Optimizing a base model for a specific domain or task +- Running automated training experiments to improve val_bpb +- Publishing an optimized model checkpoint to Ollama +- Selling an optimized model via x402 payment-gated inference + +## When NOT to Use + +- Selling an existing model without optimization β€” use `sell` +- Buying remote inference β€” use `buy-inference` +- Cluster diagnostics β€” use `obol-stack` + +## Quick Start + +### 1. Prepare Data + +Place your training and validation data in the autoresearch working directory: + +``` +autoresearch/ + train.bin # training data (tokenized) + val.bin # validation data (tokenized) + train.py # training script (agent modifies this) + results.tsv # experiment log (appended by each run) +``` + +### 2. Run Experiments + +The agent modifies `train.py` and runs experiments in a loop. Each experiment: + +- Has a **5-minute time budget** on GPU +- Produces a checkpoint and a val_bpb measurement +- Is tracked as a git commit with status (keep/discard) in `results.tsv` + +The `results.tsv` file is tab-separated with columns: + +``` +commit_hash val_bpb status description +a1b2c3d 1.042 keep baseline transformer +e4f5g6h 1.038 keep added RMSNorm +i7j8k9l 1.051 discard unstable lr schedule +``` + +### 3. Publish the Best Model + +Once experiments are complete, use `publish.py` to find the best checkpoint, register it with Ollama, and optionally sell it: + +```bash +# Publish to Ollama only +python3 scripts/publish.py /path/to/autoresearch + +# Publish and sell via x402 +python3 scripts/publish.py /path/to/autoresearch \ + --sell \ + --wallet 0xYourWalletAddress \ + --price 0.002 \ + --chain base-sepolia +``` + +## Commands + +| Command | Description | +|---------|-------------| +| `publish.py ` | Find best experiment, create Ollama model, generate provenance | +| `publish.py --sell --wallet --price

--chain ` | Publish and sell via `obol sell inference` | + +## How It Works + +1. **Experiment loop**: The agent edits `train.py`, runs training for up to 5 minutes, measures val_bpb on the validation set, and commits the result with a keep/discard verdict. + +2. **Selection**: `publish.py` reads `results.tsv`, filters for `status=keep`, and selects the experiment with the lowest val_bpb (lower is better β€” fewer bits per byte means better compression / prediction). + +3. **Provenance**: A JSON provenance file is generated with: + - `framework`: training framework used + - `metric`: `val_bpb` and its value + - `trainHash`: SHA-256 of the `train.py` at the winning commit + - `paramCount`: model parameter count (from checkpoint metadata) + - `experimentId`: git commit hash of the winning experiment + +4. **Ollama registration**: A Modelfile is generated from the checkpoint and `ollama create` registers the model locally. + +5. **Sell (optional)**: If `--sell` is passed, runs `obol sell inference` with the `--provenance-file` flag pointing at the provenance JSON so buyers can verify optimization lineage. + +## Architecture + +``` +Agent (autoresearch loop) + | + +-- edit train.py + +-- run experiment (5-min budget) + +-- measure val_bpb + +-- commit results.tsv + | + v +publish.py + | + +-- read results.tsv β†’ best experiment + +-- git show :train.py β†’ SHA-256 trainHash + +-- generate provenance.json + +-- generate Modelfile β†’ ollama create + +-- (optional) obol sell inference --provenance-file +``` + +## Constraints + +- **Python stdlib + uv** β€” no pip install; uv for environment management +- **5-minute time budget** β€” each experiment must complete within 5 minutes +- **GPU required** β€” training runs on local GPU (Ollama must have GPU access) +- **Git repo required** β€” autoresearch directory must be a git repository for commit tracking +- **results.tsv format** β€” tab-separated: `commit_hash`, `val_bpb`, `status`, `description` + +## OASF Registration + +When registering an autoresearch-optimized model on-chain via ERC-8004: + +- **Skills**: `machine_learning/model_optimization` +- **Domains**: `technology/artificial_intelligence/research` + +## References + +- `references/autoresearch-overview.md` β€” val_bpb metric, time budget, and the train.py modification loop diff --git a/internal/embed/skills/autoresearch/references/autoresearch-overview.md b/internal/embed/skills/autoresearch/references/autoresearch-overview.md new file mode 100644 index 00000000..b2baad07 --- /dev/null +++ b/internal/embed/skills/autoresearch/references/autoresearch-overview.md @@ -0,0 +1,95 @@ +# Autoresearch Overview + +## What Is Autoresearch? + +Autoresearch is an autonomous LLM optimization methodology where an AI agent iteratively modifies a training script (`train.py`), runs short experiments on GPU, and measures improvement using a single metric: **validation bits per byte (val_bpb)**. + +The agent operates in a tight loop: + +1. Analyze previous experiment results +2. Hypothesize an improvement to `train.py` +3. Commit the change (git) +4. Run training for up to 5 minutes on GPU +5. Measure val_bpb on the held-out validation set +6. Record the result in `results.tsv` with a keep/discard verdict +7. Repeat + +## val_bpb Metric + +**Validation bits per byte** measures how many bits the model needs, on average, to encode each byte of the validation set. It is derived from cross-entropy loss: + +``` +val_bpb = cross_entropy_loss / ln(2) * (tokens / bytes) +``` + +**Interpretation:** + +- **Lower is better** β€” fewer bits per byte means the model compresses / predicts the validation data more efficiently +- Typical range for small LLMs: 0.8 - 1.2 bpb +- A 0.01 improvement in val_bpb is meaningful at small scale +- The metric is data-dependent β€” only comparable across runs on the same validation set + +## 5-Minute Time Budget + +Each experiment is capped at **5 minutes of GPU wall-clock time**. This constraint: + +- Forces the agent to make small, testable changes +- Prevents runaway training jobs +- Enables rapid iteration (dozens of experiments per hour) +- Makes the search tractable on consumer GPUs + +The time budget is enforced by the training harness. If training does not converge within 5 minutes, the checkpoint at timeout is evaluated. + +## The train.py Modification Loop + +The agent treats `train.py` as the single artifact to optimize. Modifications include: + +- **Architecture changes**: layer count, hidden dimensions, attention heads, normalization +- **Training hyperparameters**: learning rate, batch size, warmup schedule, weight decay +- **Optimization tricks**: gradient accumulation, mixed precision, curriculum +- **Regularization**: dropout, data augmentation, label smoothing +- **Novel ideas**: the agent can try unconventional approaches + +Each modification is a git commit so the exact code for every experiment is reproducible. + +## results.tsv Format + +Tab-separated file appended after each experiment: + +| Column | Type | Description | +|--------|------|-------------| +| `commit_hash` | string | Git commit SHA of the experiment | +| `val_bpb` | float | Validation bits per byte (lower is better) | +| `status` | string | `keep` or `discard` | +| `description` | string | Brief description of what changed | + +Example: + +``` +commit_hash val_bpb status description +a1b2c3d 1.042 keep baseline transformer +e4f5g6h 1.038 keep added RMSNorm pre-norm +i7j8k9l 1.051 discard unstable cosine lr schedule +m0n1o2p 1.031 keep increased hidden dim to 512 +``` + +## OASF Registration + +When publishing an autoresearch-optimized model on-chain via ERC-8004, use these OASF classifications: + +- **Skills**: `machine_learning/model_optimization` +- **Domains**: `technology/artificial_intelligence/research` + +These tags help buyers discover optimized models through the agent discovery protocol and understand the provenance of the offering. + +## Provenance + +The `publish.py` script generates a provenance JSON file that records: + +- **framework**: `autoresearch` +- **metric**: `val_bpb` and its value +- **trainHash**: SHA-256 of `train.py` at the winning commit (reproducibility proof) +- **paramCount**: model parameter count (when available from checkpoint metadata) +- **experimentId**: git commit hash of the winning experiment + +This provenance file can be passed to `obol sell inference --provenance-file` so that buyers can verify the optimization lineage of the model they are purchasing. diff --git a/internal/embed/skills/autoresearch/references/deploy-app-prompt.md b/internal/embed/skills/autoresearch/references/deploy-app-prompt.md new file mode 100644 index 00000000..7e2be0d3 --- /dev/null +++ b/internal/embed/skills/autoresearch/references/deploy-app-prompt.md @@ -0,0 +1,218 @@ +# Deploy x402-Gated Web App β€” Agent Prompt Template + +Paste this into the obol-agent chat, customising the variables at the top. + +## Key architecture decisions + +- **Namespace: `llm`** β€” deploy alongside LiteLLM so the Deployment can mount `litellm-secrets` directly (Secrets are namespace-scoped, can't cross namespaces) +- **Image: `python:3.12-slim`** β€” public image, no build needed. k3d pulls it automatically. +- **App code in ConfigMap** β€” agent writes Python source to a ConfigMap, mounts it into the container at `/app`. Same pattern as the `.well-known/agent-registration.json` busybox httpd. +- **LiteLLM auth** β€” the Deployment reads `LITELLM_MASTER_KEY` from Secret `litellm-secrets` in `llm` namespace via `secretKeyRef`. Internal calls to LiteLLM don't go through x402. +- **x402 gating** β€” handled by the ServiceOffer CR. monetize.py creates the Traefik ForwardAuth Middleware + HTTPRoute at `/services//*`. Traefik strips the prefix (`ReplacePrefixMatch: /`) before forwarding, so the app just serves at `/`. +- **RBAC** β€” the agent already has cluster-wide RBAC for Deployments, Services, ConfigMaps, and ServiceOffers. No changes needed. + +--- + +## Prompt + +``` +Deploy a payment-gated web application into the cluster. Follow these steps exactly. + +### Step 1: Create ConfigMap with app code + +Create a ConfigMap named `cv-enhancer-app` in namespace `llm`. + +It must contain a single key `app.py` β€” a Python HTTP server (stdlib only: http.server, urllib, json, os, sys). The server must: + +**GET /** +Render a dark-themed HTML page with: +- Title: "CV Enhancer" with a green "x402" badge +- Subtitle: "Upload your resume and receive a polished, professional version" +- A