diff --git a/cmd/obol/sell.go b/cmd/obol/sell.go index 1e99ebc6..e2d6ef9d 100644 --- a/cmd/obol/sell.go +++ b/cmd/obol/sell.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "context" "encoding/hex" "encoding/json" @@ -142,6 +143,10 @@ Examples: Usage: "SHA-256 of model weights for TEE attestation (required with --tee)", Sources: cli.EnvVars("OBOL_MODEL_HASH"), }, + &cli.StringFlag{ + Name: "provenance-file", + Usage: "Path to JSON file with provenance metadata (e.g. autoresearch experiment results)", + }, }, Action: func(ctx context.Context, cmd *cli.Command) error { name := cmd.Args().First() @@ -200,6 +205,16 @@ Examples: TEEType: teeType, ModelHash: modelHash, } + + if pf := cmd.String("provenance-file"); pf != "" { + prov, err := loadProvenance(pf) + if err != nil { + return fmt.Errorf("load provenance: %w", err) + } + d.Provenance = prov + fmt.Printf("Loaded provenance: %s (metric %s=%s, params %s)\n", + prov.Framework, prov.MetricName, prov.MetricValue, prov.ParamCount) + } if priceTable.PerMTok != "" { d.ApproxTokensPerRequest = schemas.ApproxTokensPerRequest } @@ -313,6 +328,14 @@ Examples: Name: "register-domains", Usage: "OASF domains for discovery (e.g. technology/artificial_intelligence)", }, + &cli.StringSliceFlag{ + Name: "register-metadata", + Usage: "Additional registration metadata as key=value pairs (repeatable, e.g. gpu=A100-80GB)", + }, + &cli.StringFlag{ + Name: "provenance-file", + Usage: "Path to JSON file with provenance metadata (e.g. autoresearch experiment results)", + }, }, Action: func(ctx context.Context, cmd *cli.Command) error { if cmd.NArg() == 0 { @@ -356,6 +379,35 @@ Examples: spec["path"] = path } + if pf := cmd.String("provenance-file"); pf != "" { + prov, err := loadProvenance(pf) + if err != nil { + return fmt.Errorf("load provenance: %w", err) + } + provMap := map[string]interface{}{} + if prov.Framework != "" { + provMap["framework"] = prov.Framework + } + if prov.MetricName != "" { + provMap["metricName"] = prov.MetricName + } + if prov.MetricValue != "" { + provMap["metricValue"] = prov.MetricValue + } + if prov.ExperimentID != "" { + provMap["experimentId"] = prov.ExperimentID + } + if prov.TrainHash != "" { + provMap["trainHash"] = prov.TrainHash + } + if prov.ParamCount != "" { + provMap["paramCount"] = prov.ParamCount + } + spec["provenance"] = provMap + fmt.Printf("Loaded provenance: %s (metric %s=%s, params %s)\n", + prov.Framework, prov.MetricName, prov.MetricValue, prov.ParamCount) + } + if cmd.Bool("register") || cmd.String("register-name") != "" { reg := map[string]interface{}{ "enabled": cmd.Bool("register"), @@ -375,6 +427,13 @@ Examples: if domains := cmd.StringSlice("register-domains"); len(domains) > 0 { reg["domains"] = domains } + if metaPairs := cmd.StringSlice("register-metadata"); len(metaPairs) > 0 { + meta, err := parseMetadataPairs(metaPairs) + if err != nil { + return err + } + reg["metadata"] = meta + } spec["registration"] = reg } @@ -959,6 +1018,18 @@ func valueOrNone(s string) string { return s } +func parseMetadataPairs(values []string) (map[string]string, error) { + meta := make(map[string]string, len(values)) + for _, raw := range values { + key, value, ok := strings.Cut(raw, "=") + if !ok || strings.TrimSpace(key) == "" { + return nil, fmt.Errorf("invalid --register-metadata value %q: expected key=value", raw) + } + meta[strings.TrimSpace(key)] = strings.TrimSpace(value) + } + return meta, nil +} + func resolvePriceTable(cmd *cli.Command, allowPerHour bool) (schemas.PriceTable, error) { perRequest := cmd.String("price") if perRequest == "" { @@ -979,6 +1050,9 @@ func resolvePriceTable(cmd *cli.Command, allowPerHour bool) (schemas.PriceTable, } return schemas.PriceTable{PerMTok: perMTok}, nil case perHour != "": + if _, err := schemas.ApproximateRequestPriceFromPerHour(perHour); err != nil { + return schemas.PriceTable{}, fmt.Errorf("invalid --per-hour value %q: %w", perHour, err) + } return schemas.PriceTable{PerHour: perHour}, nil default: if allowPerHour { @@ -999,7 +1073,11 @@ func formatPriceTableSummary(priceTable schemas.PriceTable) string { schemas.ApproxTokensPerRequest, ) case priceTable.PerHour != "": - return fmt.Sprintf("%s USDC/hour", priceTable.PerHour) + return fmt.Sprintf("%s USDC/request (approx from %s USDC/hour @ %d min/request)", + priceTable.EffectiveRequestPrice(), + priceTable.PerHour, + schemas.ApproxMinutesPerRequest, + ) default: return "0 USDC/request" } @@ -1024,6 +1102,21 @@ func formatInferencePriceSummary(d *inference.Deployment) string { return fmt.Sprintf("%s USDC/request", d.PricePerRequest) } +// loadProvenance reads a provenance JSON file and returns the parsed struct. +func loadProvenance(path string) (*inference.Provenance, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read %s: %w", path, err) + } + var prov inference.Provenance + dec := json.NewDecoder(bytes.NewReader(data)) + dec.DisallowUnknownFields() + if err := dec.Decode(&prov); err != nil { + return nil, fmt.Errorf("parse %s: %w", path, err) + } + return &prov, nil +} + // removePricingRoute removes the x402-verifier pricing route for the given offer. func removePricingRoute(cfg *config.Config, name string) { urlPath := fmt.Sprintf("/services/%s", name) diff --git a/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml b/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml index ee7314fc..af92693d 100644 --- a/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml +++ b/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml @@ -154,6 +154,32 @@ spec: perEpoch: type: string description: "Per-training-epoch price in USDC. Fine-tuning only." + provenance: + type: object + description: >- + Optional provenance metadata for the service. Tracks how the + model or service was produced (e.g. autoresearch experiment data). + Included in the ERC-8004 registration document when present. + properties: + framework: + type: string + description: "Optimization framework (e.g. autoresearch)." + metricName: + type: string + description: "Name of the primary quality metric (e.g. val_bpb)." + default: "val_bpb" + metricValue: + type: string + description: "Primary quality metric value (e.g. 0.9973)." + experimentId: + type: string + description: "Experiment or commit identifier." + trainHash: + type: string + description: "SHA-256 hash of the training code that produced this model." + paramCount: + type: string + description: "Model parameter count (e.g. 50M, 1.3B)." path: type: string description: "URL path prefix for the HTTPRoute, defaults to /services/." @@ -201,6 +227,14 @@ spec: Valid values: reputation, crypto-economic, tee-attestation. items: type: string + metadata: + type: object + description: >- + Additional registration metadata published into the generated + agent-registration.json for discovery and ranking (for example: + gpu, framework, best_val_bpb, total_experiments). + additionalProperties: + type: string status: type: object properties: diff --git a/internal/embed/skills/autoresearch-coordinator/SKILL.md b/internal/embed/skills/autoresearch-coordinator/SKILL.md new file mode 100644 index 00000000..ee7a4353 --- /dev/null +++ b/internal/embed/skills/autoresearch-coordinator/SKILL.md @@ -0,0 +1,179 @@ +--- +name: autoresearch-coordinator +description: "Coordinate distributed autoresearch experiments across GPU workers discovered via ERC-8004 and paid via x402 micropayments." +metadata: { "openclaw": { "emoji": "\ud83d\udd2c", "requires": { "bins": ["python3", "curl"] } } } +--- + +# Autoresearch Coordinator + +Coordinate distributed autoresearch experiments across GPU workers discovered on-chain via ERC-8004 and paid per-experiment via x402 micropayments. This replaces the Ensue-based shared-memory coordinator from autoresearch-at-home with a fully decentralised discovery and payment loop built on obol-stack primitives. + +## When to Use + +- Discovering GPU workers advertising `machine_learning/model_optimization` capabilities via 8004scan +- Probing worker endpoints for x402 pricing before submitting experiments +- Submitting `train.py` experiments to remote GPU workers through x402 payment gates +- Running the continuous THINK/CLAIM/RUN/PUBLISH experiment loop +- Viewing the global leaderboard of autoresearch results from worker metadata +- Coordinating multi-worker experiment campaigns + +## When NOT to Use + +- Selling your own GPU as a worker -- use `autoresearch-worker` (then monetize it with `obol sell http`) +- Buying generic inference (chat completions) -- use `buy-inference` +- Discovering agents without running experiments -- use `discovery` +- Signing transactions directly -- use `ethereum-local-wallet` +- Cluster diagnostics -- use `obol-stack` + +## Quick Start + +```bash +# Discover available GPU workers on 8004scan +python3 scripts/coordinate.py discover + +# Discover with custom limit +python3 scripts/coordinate.py discover --limit 5 + +# Probe a specific worker for pricing +python3 scripts/coordinate.py probe https://worker.example.com/services/autoresearch-worker + +# Submit a single experiment to a worker +python3 scripts/coordinate.py submit https://worker.example.com/services/autoresearch-worker train.py + +# Submit with custom config overrides +python3 scripts/coordinate.py submit https://worker.example.com/services/autoresearch-worker train.py \ + --config '{"batch_size": 64, "learning_rate": 0.001}' + +# View global leaderboard (best val_bpb across all workers) +python3 scripts/coordinate.py leaderboard + +# Run continuous experiment loop (discover -> pick -> submit -> publish) +python3 scripts/coordinate.py loop train.py + +# Loop with worker preference and max rounds +python3 scripts/coordinate.py loop train.py --prefer https://worker.example.com/services/autoresearch-worker --rounds 10 +``` + +## Commands + +| Command | Description | +|---------|-------------| +| `discover [--limit N]` | Query 8004scan for GPU workers with `machine_learning/model_optimization` skill | +| `probe ` | Send unauthenticated request to parse 402 pricing from the worker | +| `submit [--config JSON]` | Submit experiment with x402 payment (pre-sign ERC-3009, attach X-PAYMENT) | +| `leaderboard [--limit N]` | Query 8004scan for all autoresearch workers, rank by best `val_bpb` | +| `loop [--prefer URL] [--rounds N]` | Continuous loop: discover, pick best worker, submit, collect, publish | + +## The Experiment Loop + +The coordinator implements a THINK/CLAIM/RUN/PUBLISH loop: + +1. **THINK** -- Read current `train.py`, review leaderboard results, decide on next experiment variation +2. **CLAIM** -- Discover workers via 8004scan, probe pricing, select best worker (cheapest or preferred) +3. **RUN** -- Submit `train.py` + config to worker via POST `/experiment` through the x402 payment gate +4. **PUBLISH** -- Record result locally with provenance metadata (val_bpb, experiment_id, train.py hash, worker endpoint) + +Each step is atomic and idempotent. If a worker fails mid-experiment, the coordinator retries with the next available worker. + +## How Discovery Works + +Workers register on-chain via ERC-8004 and advertise capabilities through OASF (Open Agent Skills Framework) metadata. The coordinator queries the 8004scan public API: + +``` +GET https://www.8004scan.io/api/v1/public/agents + ?protocol=OASF + &search=machine_learning/model_optimization + &limit=20 +``` + +The API returns agent summary objects. The coordinator then prefers the embedded registration document in `raw_metadata.offchain_content` and falls back to the off-chain registration URI when needed. + +From the registration document it extracts: +- service endpoints (where to POST experiments) +- x402 support flag (payment-gated access) +- OASF capability metadata from the `services[]` entry with `name: OASF` +- leaderboard metadata such as `best_val_bpb` + +## How Payment Works + +Experiment submission uses the same x402 payment flow as `buy-inference`: + +1. **Probe** -- Send unauthenticated POST to worker endpoint, receive `402 Payment Required` with pricing +2. **Sign** -- Pre-sign an ERC-3009 `TransferWithAuthorization` voucher via the current remote-signer API (`GET /api/v1/keys`, `POST /api/v1/sign/
/typed-data`) +3. **Submit** -- Re-send the POST with the `X-PAYMENT` header containing the signed voucher +4. **Settle** -- Worker's x402 verifier validates payment via the facilitator, forwards request to GPU + +Payment is per-experiment (not per-token). The 402 response includes `maxAmountRequired` which is the cost for one experiment run. + +## How Results are Published + +After collecting a result from a worker, the coordinator stores provenance metadata locally: + +```json +{ + "experiment_id": "exp-20260312-a1b2c3", + "train_hash": "sha256:abcdef...", + "val_bpb": 1.234, + "worker_endpoint": "https://worker.example.com/services/autoresearch", + "worker_agent_id": 42, + "timestamp": "2026-03-12T10:30:00Z", + "payment_tx": "0xdeadbeef..." +} +``` + +Results are appended to `$DATA_DIR/autoresearch/results.jsonl` (one JSON object per line). + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `SCAN_API_URL` | `https://www.8004scan.io/api/v1/public` | 8004scan public API base URL; the coordinator queries `/agents` under this base | +| `REMOTE_SIGNER_URL` | `http://remote-signer:9000` | Remote-signer REST API for payment signing | +| `ERPC_URL` | `http://erpc.erpc.svc.cluster.local:4000/rpc` | eRPC gateway base URL | +| `ERPC_NETWORK` | `base-sepolia` | Default chain for payment | +| `DATA_DIR` | `/data` | Base directory for result storage | + +## Architecture + +``` +coordinate.py + | + +-- discover: 8004scan API (OASF filter) + | | + | v + | Worker .well-known/agent-registration.json + | + +-- probe: POST /experiment (no payment) + | | + | v + | 402 Payment Required (pricing JSON) + | + +-- submit: ERC-3009 sign via remote-signer + | | + | +-- POST /experiment + X-PAYMENT header + | | | + | | v + | | x402 verifier -> facilitator -> GPU worker + | | | + | | v + | | 200 + experiment result + | | + | v + +-- publish: results.jsonl (local provenance) +``` + +## Constraints + +- **Requires remote-signer** -- must have agent wallet provisioned via `obol openclaw onboard` +- **Requires network access** -- 8004scan API and worker endpoints must be reachable +- **Python stdlib only** -- uses `urllib`; no third-party Python dependencies required +- **Per-experiment payment** -- each submission costs one x402 payment; monitor balance via `buy-inference balance` +- **Worker availability** -- workers may go offline between discovery and submission; coordinator retries automatically +- **Result storage is local** -- provenance metadata stored on-disk, not on-chain (future: IPFS/on-chain attestations) + +## References + +- `references/coordination-protocol.md` -- Ensue-to-obol mapping, discovery flow, payment flow, leaderboard format +- See also: `discovery` skill for raw ERC-8004 registry queries +- See also: `buy-inference` skill for the x402 buyer sidecar architecture +- See also: `sell` skill for running a GPU worker (sell-side) diff --git a/internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md b/internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md new file mode 100644 index 00000000..063f8881 --- /dev/null +++ b/internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md @@ -0,0 +1,192 @@ +# Coordination Protocol Reference + +How the autoresearch coordinator maps from the original Ensue-based shared-memory model to obol-stack's decentralised primitives. + +## Ensue to obol-stack Mapping + +| Ensue Concept | obol-stack Equivalent | Notes | +|---|---|---| +| Shared memory (Redis/filesystem) | ERC-8004 on-chain registry + 8004scan API | Workers register capabilities on-chain; coordinator discovers via API | +| Task queue (Ensue scheduler) | Direct HTTP POST to worker `/experiment` endpoint | No central queue; coordinator submits directly to chosen worker | +| Worker discovery (static config) | 8004scan OASF query (`machine_learning/model_optimization`) | Dynamic discovery; workers join/leave without coordinator restart | +| Payment (none / trust-based) | x402 micropayments (USDC via ERC-3009 pre-signed auths) | Per-experiment payment; no credit accounts or invoicing | +| Result aggregation (shared DB) | Local `results.jsonl` + worker `.well-known` metadata | Coordinator stores locally; workers publish best scores in registration | +| Leaderboard (centralized) | 8004scan metadata aggregation | Workers self-report best `val_bpb` in their registration JSON | +| Worker health checks (heartbeat) | x402 probe (402 = alive, timeout = dead) | Payment gate doubles as health check | +| Experiment versioning (git) | SHA-256 hash of `train.py` (`train_hash` field) | Immutable reference to exact code submitted | + +## Discovery Flow + +``` +Coordinator 8004scan Worker + | | | + |-- GET /api/v1/public/agents ------>| | + | ?protocol=OASF | | + | &search=machine_learning/ | | + | model_optimization | | + | &limit=20 | | + | | | + |<-- {data: [agent summaries]} ------| | + | | | + | (prefer raw_metadata.offchain_content when present) | + | | | + |-- GET (fallback only) ---------------------------->| + | | + |<-- {services: [...], x402Support: true, metadata: {...}} ---------| + | | + | (extract endpoint, verify x402 + OASF service entry) | +``` + +### 8004scan API Parameters + +| Parameter | Type | Description | +|---|---|---| +| `protocol` | string | Filter by protocol: `OASF`, `MCP`, `A2A`, `Web`, `Email` | +| `search` | string | Keyword search across name, description, skills | +| `chainId` | int | Filter by chain (e.g., 84532 for Base Sepolia) | +| `ownerAddress` | address | Filter by registration owner | +| `sortBy` | string | Sort field (e.g., `registeredAt`) | +| `limit` | int | Max results to return | + +### Worker Registration JSON + +Workers advertise capabilities in their `.well-known/agent-registration.json`: + +```json +{ + "type": "https://eips.ethereum.org/EIPS/eip-8004#registration-v1", + "name": "GPU Worker Alpha", + "description": "A100 GPU worker for autoresearch experiments", + "services": [ + { + "name": "web", + "endpoint": "https://worker.example.com/services/autoresearch-worker", + "version": "1.0.0" + }, + { + "name": "OASF", + "version": "0.8", + "skills": ["machine_learning/model_optimization"], + "domains": ["technology/artificial_intelligence/research"] + } + ], + "x402Support": true, + "metadata": { + "gpu": "A100-80GB", + "framework": "pytorch", + "best_val_bpb": "1.234", + "total_experiments": "42", + "updated": "2026-03-12T10:30:00Z" + }, + "active": true +} +``` + +## Payment Flow + +``` +Coordinator Worker (x402 gate) Facilitator Chain + | | | | + |-- POST /experiment --------->| | | + | (no X-PAYMENT header) | | | + | | | | + |<-- 402 Payment Required -----| | | + | {payTo, network, | | | + | maxAmountRequired} | | | + | | | | + |-- sign ERC-3009 auth --------|---------------------------|----------------->| + | (GET /api/v1/keys + | | | + | POST /api/v1/sign//typed-data) | | + | | | | + |-- POST /experiment --------->| | | + | X-PAYMENT: {signature, | | | + | authorization, chain, |-- verify payment -------->| | + | token} | |-- settle USDC -->| + | body: {train_py, config} |<-- 200 OK (valid) -------| | + | | | | + | |-- run experiment | | + | | (GPU training) | | + | | | | + |<-- 200 {val_bpb, metrics} ---| | | +``` + +### ERC-3009 Authorization Structure + +Each payment authorization contains: + +| Field | Type | Description | +|---|---|---| +| `from` | address | Coordinator's wallet (agent wallet from remote-signer) | +| `to` | address | Worker's `payTo` address (from 402 response) | +| `value` | uint256 | Payment amount in USDC micro-units | +| `validAfter` | uint256 | Unix timestamp (0 = immediately valid) | +| `validBefore` | uint256 | Unix timestamp (current time + 1 hour) | +| `nonce` | bytes32 | Random 32-byte nonce (single-use, prevents replay) | + +### X-PAYMENT Header Format + +```json +{ + "signature": "0x...", + "authorization": { + "from": "0xCoordinatorWallet", + "to": "0xWorkerPayTo", + "value": "1000", + "validAfter": "0", + "validBefore": "1741784400", + "nonce": "0xrandom32bytes..." + }, + "chain": "base-sepolia", + "token": "0x036CbD53842c5426634e7929541eC2318f3dCF7e" +} +``` + +## Leaderboard Metadata Format + +Workers publish their best results in the `.well-known/agent-registration.json` metadata section. The coordinator aggregates these via 8004scan queries. + +### Required Fields + +| Field | Type | Description | +|---|---|---| +| `metadata.best_val_bpb` | float | Best validation bits-per-byte achieved | +| `metadata.total_experiments` | int | Total experiments processed by this worker | +| `metadata.updated` | string | ISO 8601 timestamp of last result update | + +### Optional Fields + +| Field | Type | Description | +|---|---|---| +| `metadata.gpu` | string | GPU model (e.g., `A100-80GB`, `H100`) | +| `metadata.framework` | string | Training framework (e.g., `pytorch`, `jax`) | +| `metadata.best_experiment_hash` | string | SHA-256 hash of the train.py that produced the best result | +| `metadata.avg_experiment_time` | float | Average seconds per experiment | + +### Leaderboard Ranking + +The coordinator ranks workers by `metadata.best_val_bpb` in ascending order (lower is better). When querying the leaderboard: + +1. Fetch all workers with `machine_learning/model_optimization` skill from 8004scan +2. For each worker, fetch their registration JSON +3. Extract `metadata.best_val_bpb` (skip workers without this field) +4. Sort ascending by `val_bpb` +5. Display rank, score, agent name, and last update time + +### Local Results Format + +The coordinator also maintains a local `results.jsonl` file for provenance tracking. Each line is a JSON object: + +```json +{ + "experiment_id": "exp-20260312-a1b2c3", + "train_hash": "sha256:abcdef1234567890...", + "val_bpb": 1.234, + "worker_endpoint": "https://worker.example.com/services/autoresearch", + "worker_agent_id": 42, + "timestamp": "2026-03-12T10:30:00Z", + "chain": "base-sepolia", + "raw_result": { "...worker response..." } +} +``` + +The `experiment_id` format is `exp-YYYYMMDD-XXXXXX` where `XXXXXX` is 6 random hex chars. The `train_hash` is the SHA-256 of the exact `train.py` source submitted, providing an immutable reference for reproducibility. diff --git a/internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py b/internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py new file mode 100644 index 00000000..5f4bc716 --- /dev/null +++ b/internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py @@ -0,0 +1,1031 @@ +#!/usr/bin/env python3 +"""coordinate.py -- Distributed autoresearch coordinator via obol-stack. + +Discovers GPU workers registered on ERC-8004 via the 8004scan API, probes +their x402 pricing, submits experiments with micropayments, and tracks +results with local provenance metadata. + +Replaces the Ensue-based shared-memory coordinator from autoresearch-at-home +with decentralised discovery (ERC-8004) and payment (x402). + +Usage: + python3 coordinate.py [args] + +Commands: + discover [--limit N] List GPU workers from 8004scan + probe Check x402 pricing for a worker + submit [--config JSON] Submit experiment with payment + leaderboard [--limit N] Global rankings by val_bpb + loop [--prefer URL] [--rounds N] Continuous experiment loop +""" + +import argparse +import base64 +import hashlib +import json +import os +import sys +import time +import urllib.error +import urllib.parse +import urllib.request +from datetime import datetime, timezone + +# --------------------------------------------------------------------------- +# Import shared helpers from sibling skills +# --------------------------------------------------------------------------- + +SKILL_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +SIGNER_SCRIPTS = os.path.join(os.path.dirname(SKILL_DIR), "ethereum-local-wallet", "scripts") +sys.path.insert(0, SIGNER_SCRIPTS) + +try: + from signer import _signer_get, _signer_post # noqa: E402 + HAS_SIGNER = True +except ImportError: + HAS_SIGNER = False + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +SCAN_API_URL = os.environ.get( + "SCAN_API_URL", "https://www.8004scan.io/api/v1/public" +) +REMOTE_SIGNER_URL = os.environ.get("REMOTE_SIGNER_URL", "http://remote-signer:9000") +ERPC_URL = os.environ.get("ERPC_URL", "http://erpc.erpc.svc.cluster.local:4000/rpc") +DEFAULT_CHAIN = os.environ.get("ERPC_NETWORK", "base-sepolia") +DATA_DIR = os.environ.get("DATA_DIR", "/data") + +RESULTS_DIR = os.path.join(DATA_DIR, "autoresearch") +RESULTS_FILE = os.path.join(RESULTS_DIR, "results.jsonl") + +OASF_SKILL_FILTER = "machine_learning/model_optimization" + +CHAIN_IDS = { + "base-sepolia": 84532, + "base": 8453, + "ethereum": 1, + "mainnet": 1, + "sepolia": 11155111, +} + +USDC_CONTRACTS = { + "base-sepolia": "0x036CbD53842c5426634e7929541eC2318f3dCF7e", + "base": "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913", + "ethereum": "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48", +} + +USDC_DOMAIN_NAME = "USDC" +USDC_DOMAIN_VERSION = "2" + +DEFAULT_LOOP_DELAY = 60 # seconds between loop iterations + + +# --------------------------------------------------------------------------- +# HTTP helpers +# --------------------------------------------------------------------------- + +def _http_get(url, headers=None, timeout=30): + """GET request returning parsed JSON.""" + hdrs = {"Accept": "application/json", "User-Agent": "obol-autoresearch/1.0"} + if headers: + hdrs.update(headers) + req = urllib.request.Request(url, headers=hdrs, method="GET") + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read()) + + +def _http_post(url, body, headers=None, timeout=120): + """POST request returning (status_code, headers_dict, body_bytes).""" + hdrs = {"Content-Type": "application/json", "User-Agent": "obol-autoresearch/1.0"} + if headers: + hdrs.update(headers) + data = json.dumps(body).encode() if isinstance(body, dict) else body + req = urllib.request.Request(url, data=data, headers=hdrs, method="POST") + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return resp.status, dict(resp.headers), resp.read() + except urllib.error.HTTPError as e: + with e: + return e.code, dict(e.headers), e.read() + + +# --------------------------------------------------------------------------- +# 8004scan API +# --------------------------------------------------------------------------- + +def build_scan_api_url(protocol="OASF", search=None, limit=20, chain_id=None, + sort_by=None, owner_address=None): + """Build the 8004scan /agents query URL.""" + params = {"limit": limit} + if protocol: + params["protocol"] = protocol + if search: + params["search"] = search + if chain_id: + params["chainId"] = chain_id + if sort_by: + params["sortBy"] = sort_by + if owner_address: + params["ownerAddress"] = owner_address + base = SCAN_API_URL.rstrip("/") + return f"{base}/agents?{urllib.parse.urlencode(params)}" + + +def query_8004scan(protocol="OASF", search=None, limit=20, chain_id=None, + sort_by=None, owner_address=None): + """Query the 8004scan public API for registered agents. + + Supports filtering by protocol (MCP/A2A/OASF/Web/Email), keyword search, + chainId, ownerAddress, and sorting. + + Returns list of agent summary objects. + """ + url = build_scan_api_url(protocol, search, limit, chain_id, sort_by, owner_address) + try: + result = _http_get(url) + if isinstance(result, dict): + data = result.get("data", result.get("items", [])) + if isinstance(data, list): + if not data and search: + # Fall back to protocol-only listing if the keyword search is too strict. + fallback = _http_get(build_scan_api_url(protocol, None, limit, chain_id, sort_by, owner_address)) + if isinstance(fallback, dict): + fallback_data = fallback.get("data", fallback.get("items", [])) + if isinstance(fallback_data, list): + return fallback_data + elif isinstance(fallback, list): + return fallback + return data + if isinstance(result, list): + return result + return [] + except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError) as e: + print(f"Error querying 8004scan: {e}", file=sys.stderr) + return [] + + +def fetch_registration_json(uri): + """Fetch the .well-known/agent-registration.json from a worker's URI.""" + if not uri: + return None + if isinstance(uri, dict): + return uri + if uri.startswith("data:application/json;base64,"): + try: + raw = uri.split(",", 1)[1] + return json.loads(base64.b64decode(raw).decode("utf-8")) + except (ValueError, json.JSONDecodeError) as e: + print(f" Warning: Failed to decode registration data URI: {e}", file=sys.stderr) + return None + if not (uri.startswith("http://") or uri.startswith("https://")): + return None + try: + return _http_get(uri, timeout=15) + except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError) as e: + print(f" Warning: Failed to fetch {uri}: {e}", file=sys.stderr) + return None + + +def extract_registration_from_agent(agent): + """Extract the registration document from a current 8004scan agent summary.""" + raw = agent.get("raw_metadata", {}) if isinstance(agent, dict) else {} + offchain = raw.get("offchain_content") if isinstance(raw, dict) else None + if isinstance(offchain, dict): + return offchain + uri = ( + agent.get("uri") + or agent.get("tokenURI") + or (raw.get("offchain_uri") if isinstance(raw, dict) else None) + or agent.get("agent_url") + ) + return fetch_registration_json(uri) + + +def extract_worker_endpoint(registration): + """Extract the experiment submission endpoint from a registration JSON. + + Looks for a service with x402Support and an endpoint path containing + '/services/' or '/experiment'. + """ + if not registration: + return None + + services = registration.get("services", []) + for svc in services: + endpoint = svc.get("endpoint", "") + if "/services/" in endpoint or "/experiment" in endpoint: + return endpoint + + # Fallback: if x402Support is true, try constructing from the first service + if registration.get("x402Support") and services: + return services[0].get("endpoint") + + return None + + +def extract_oasf_skills(registration): + """Extract OASF skills/domains from registration metadata.""" + found = [] + + services = registration.get("services", []) if isinstance(registration, dict) else [] + if isinstance(services, list): + for svc in services: + if not isinstance(svc, dict): + continue + if str(svc.get("name", "")).upper() != "OASF": + continue + for key in ("skills", "domains"): + value = svc.get(key, []) + if isinstance(value, list): + for item in value: + if item is not None: + found.append(str(item)) + + if found: + return found + + oasf = registration.get("oasf", registration.get("skills", [])) if isinstance(registration, dict) else [] + if isinstance(oasf, list): + for s in oasf: + if isinstance(s, dict): + for key in ("skills", "domains"): + value = s.get(key, []) + if isinstance(value, list): + for item in value: + if item is not None: + found.append(str(item)) + domain = s.get("domain") + if domain is not None: + found.append(str(domain)) + name = s.get("name") + if name is not None: + found.append(str(name)) + elif isinstance(s, str): + found.append(s) + return found + + +# --------------------------------------------------------------------------- +# x402 payment helpers +# --------------------------------------------------------------------------- + +def parse_402_pricing(headers, body): + """Parse pricing info from a 402 Payment Required response. + + Returns dict with: payTo, network, maxAmountRequired, facilitatorURL, + or None if unparseable. + """ + try: + data = json.loads(body) if isinstance(body, bytes) else body + except (json.JSONDecodeError, TypeError): + data = {} + + # x402 pricing can be in response body or headers + pricing = {} + + # Try body fields + for key in ("payTo", "network", "maxAmountRequired", "facilitatorURL", + "price", "priceModel", "description"): + if key in data: + pricing[key] = data[key] + + # Try x402-specific headers + if "X-Payment-PayTo" in headers: + pricing["payTo"] = headers["X-Payment-PayTo"] + if "X-Payment-Network" in headers: + pricing["network"] = headers["X-Payment-Network"] + if "X-Payment-Amount" in headers: + pricing["maxAmountRequired"] = headers["X-Payment-Amount"] + + # Also check for nested pricing structure + if "pricing" in data and isinstance(data["pricing"], dict): + pricing.update(data["pricing"]) + + if not pricing.get("payTo") and not pricing.get("maxAmountRequired"): + return None + + return pricing + + +def sign_erc3009_auth(pay_to, amount, chain=None): + """Sign an ERC-3009 TransferWithAuthorization via the remote-signer. + + Returns the signed authorization dict suitable for an X-PAYMENT header, + or None on failure. + """ + if not HAS_SIGNER: + print("Error: remote-signer helpers not available", file=sys.stderr) + return None + + network = chain or DEFAULT_CHAIN + chain_id = CHAIN_IDS.get(network) + usdc = USDC_CONTRACTS.get(network) + if not chain_id or not usdc: + print(f"Error: unsupported chain '{network}' for payment", file=sys.stderr) + return None + + # Get signer address using the same API as ethereum-local-wallet. + try: + info = _signer_get("/api/v1/keys") + if isinstance(info, dict): + keys = info.get("keys", []) + signer_address = keys[0] if keys else None + elif isinstance(info, list): + signer_address = info[0] if info else None + else: + signer_address = None + if not signer_address: + print("Error: no keys in remote-signer", file=sys.stderr) + return None + except Exception as e: + print(f"Error contacting remote-signer: {e}", file=sys.stderr) + return None + + # Generate random nonce (32 bytes) + nonce = "0x" + os.urandom(32).hex() + + # Valid for 1 hour + valid_after = 0 + valid_before = int(time.time()) + 3600 + + # EIP-712 typed data for TransferWithAuthorization + typed_data = { + "types": { + "EIP712Domain": [ + {"name": "name", "type": "string"}, + {"name": "version", "type": "string"}, + {"name": "chainId", "type": "uint256"}, + {"name": "verifyingContract", "type": "address"}, + ], + "TransferWithAuthorization": [ + {"name": "from", "type": "address"}, + {"name": "to", "type": "address"}, + {"name": "value", "type": "uint256"}, + {"name": "validAfter", "type": "uint256"}, + {"name": "validBefore", "type": "uint256"}, + {"name": "nonce", "type": "bytes32"}, + ], + }, + "primaryType": "TransferWithAuthorization", + "domain": { + "name": USDC_DOMAIN_NAME, + "version": USDC_DOMAIN_VERSION, + "chainId": chain_id, + "verifyingContract": usdc, + }, + "message": { + "from": signer_address, + "to": pay_to, + "value": str(amount), + "validAfter": str(valid_after), + "validBefore": str(valid_before), + "nonce": nonce, + }, + } + + try: + sig_data = _signer_post(f"/api/v1/sign/{signer_address}/typed-data", typed_data) + signature = sig_data.get("signature") if isinstance(sig_data, dict) else sig_data + if not signature: + print("Error: remote-signer returned empty signature", file=sys.stderr) + return None + return { + "signature": signature, + "authorization": typed_data["message"], + "chain": network, + "token": usdc, + } + except Exception as e: + print(f"Error signing authorization: {e}", file=sys.stderr) + return None + + +def build_x_payment_header(signed_auth): + """Encode a signed authorization as an X-PAYMENT header value (JSON).""" + if not signed_auth: + return None + return json.dumps(signed_auth) + + +# --------------------------------------------------------------------------- +# Result provenance +# --------------------------------------------------------------------------- + +def _ensure_results_dir(): + """Create the results directory if it does not exist.""" + os.makedirs(RESULTS_DIR, exist_ok=True) + + +def save_result(result): + """Append an experiment result to the local results.jsonl.""" + _ensure_results_dir() + with open(RESULTS_FILE, "a") as f: + f.write(json.dumps(result) + "\n") + + +def load_results(): + """Load all experiment results from results.jsonl.""" + if not os.path.exists(RESULTS_FILE): + return [] + results = [] + with open(RESULTS_FILE, "r") as f: + for line in f: + line = line.strip() + if line: + try: + results.append(json.loads(line)) + except json.JSONDecodeError: + continue + return results + + +def compute_train_hash(train_py_path): + """Compute SHA-256 hash of a train.py file.""" + h = hashlib.sha256() + with open(train_py_path, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + h.update(chunk) + return f"sha256:{h.hexdigest()}" + + +def generate_experiment_id(): + """Generate a unique experiment ID.""" + ts = datetime.now(timezone.utc).strftime("%Y%m%d") + suffix = os.urandom(3).hex() + return f"exp-{ts}-{suffix}" + + +# --------------------------------------------------------------------------- +# ObolCoordinator +# --------------------------------------------------------------------------- + +class ObolCoordinator: + """Coordinates distributed autoresearch experiments via obol-stack. + + Discovers GPU workers through 8004scan, pays per-experiment via x402, + and tracks results with local provenance metadata. + """ + + def __init__(self, chain=None): + self.chain = chain or DEFAULT_CHAIN + + def discover_workers(self, limit=20): + """Query 8004scan for workers advertising machine_learning/model_optimization. + + Returns list of dicts with keys: name, endpoint, uri, agent_id, skills, x402. + """ + agents = query_8004scan( + protocol="OASF", + search=OASF_SKILL_FILTER, + limit=limit, + ) + + workers = [] + for agent in agents: + raw = agent.get("raw_metadata", {}) if isinstance(agent, dict) else {} + uri = agent.get("uri", agent.get("tokenURI", "")) or (raw.get("offchain_uri", "") if isinstance(raw, dict) else "") + agent_id = agent.get("agentId") or agent.get("agent_id") or agent.get("id") or agent.get("token_id") + name = agent.get("name", f"agent-{agent_id or '?'}") + + registration = extract_registration_from_agent(agent) + endpoint = extract_worker_endpoint(registration) if registration else None + skills = extract_oasf_skills(registration) if registration else [] + x402 = bool((registration or {}).get("x402Support", agent.get("x402_supported", False))) + + workers.append({ + "name": name, + "endpoint": endpoint, + "uri": uri, + "agent_id": agent_id, + "skills": skills, + "x402": x402, + "registration": registration, + }) + + return workers + + def probe_worker(self, endpoint): + """Send unauthenticated request to worker, parse 402 for pricing. + + Returns pricing dict or None if the worker is not x402-gated. + """ + # Send a minimal experiment probe (empty body triggers 402 before processing) + probe_body = {"probe": True} + status, headers, body = _http_post( + endpoint.rstrip("/") + "/experiment", + probe_body, + timeout=30, + ) + + if status == 402: + pricing = parse_402_pricing(headers, body) + if pricing: + pricing["status"] = 402 + pricing["endpoint"] = endpoint + return pricing + print(f" Got 402 but could not parse pricing from {endpoint}", file=sys.stderr) + return None + + if 200 <= status < 300: + print(f" Worker at {endpoint} returned {status} (no payment gate)") + return {"status": status, "endpoint": endpoint, "free": True} + + print(f" Worker at {endpoint} returned unexpected status {status}", file=sys.stderr) + return None + + def submit_experiment(self, endpoint, train_py_source, config=None): + """Submit an experiment to a worker with x402 payment. + + Args: + endpoint: Worker's base endpoint URL + train_py_source: Contents of train.py as a string + config: Optional dict of config overrides for the experiment + + Returns result dict from the worker, or None on failure. + """ + experiment_url = endpoint.rstrip("/") + "/experiment" + + # Step 1: Probe for pricing + probe_body = {"probe": True} + status, headers, body = _http_post(experiment_url, probe_body, timeout=30) + + if status != 402: + if 200 <= status < 300: + # No payment gate -- submit directly + print(" Worker has no payment gate, submitting directly...") + return self._submit_direct(experiment_url, train_py_source, config) + print(f" Probe failed with status {status}", file=sys.stderr) + return None + + # Step 2: Parse pricing + pricing = parse_402_pricing(headers, body) + if not pricing: + print(" Could not parse 402 pricing", file=sys.stderr) + return None + + pay_to = pricing.get("payTo") + amount = pricing.get("maxAmountRequired", pricing.get("price")) + if not pay_to or not amount: + print(" Pricing missing payTo or amount", file=sys.stderr) + return None + + print(f" Price: {amount} USDC micro-units to {pay_to}") + + # Step 3: Sign ERC-3009 authorization + signed_auth = sign_erc3009_auth(pay_to, int(amount), self.chain) + if not signed_auth: + print(" Failed to sign payment authorization", file=sys.stderr) + return None + + # Step 4: Submit with payment + payment_header = build_x_payment_header(signed_auth) + submit_body = { + "train_py": train_py_source, + "config": config or {}, + } + submit_headers = {"X-PAYMENT": payment_header} + + print(" Submitting experiment with payment...") + status2, headers2, body2 = _http_post( + experiment_url, submit_body, headers=submit_headers, timeout=600 + ) + + if 200 <= status2 < 300: + try: + return json.loads(body2) + except json.JSONDecodeError: + return {"raw": body2.decode("utf-8", errors="replace"), "status": status2} + + print(f" Submission failed with status {status2}", file=sys.stderr) + try: + err = json.loads(body2) + print(f" Response: {json.dumps(err, indent=2)}", file=sys.stderr) + except (json.JSONDecodeError, TypeError): + print(f" Response: {body2[:500]}", file=sys.stderr) + return None + + def _submit_direct(self, url, train_py_source, config=None): + """Submit experiment without payment (free worker).""" + submit_body = { + "train_py": train_py_source, + "config": config or {}, + } + status, _, body = _http_post(url, submit_body, timeout=600) + if 200 <= status < 300: + try: + return json.loads(body) + except json.JSONDecodeError: + return {"raw": body.decode("utf-8", errors="replace"), "status": status} + return None + + def publish_result(self, val_bpb, experiment_id, train_hash, + worker_endpoint=None, worker_agent_id=None, extra=None): + """Store experiment result with provenance metadata locally.""" + result = { + "experiment_id": experiment_id, + "train_hash": train_hash, + "val_bpb": val_bpb, + "worker_endpoint": worker_endpoint, + "worker_agent_id": worker_agent_id, + "timestamp": datetime.now(timezone.utc).isoformat(), + "chain": self.chain, + } + if extra: + result.update(extra) + save_result(result) + return result + + def get_leaderboard(self, limit=20): + """Query 8004scan for autoresearch workers and rank by best val_bpb. + + Fetches worker registration metadata and extracts reported val_bpb + scores from their .well-known metadata. + """ + agents = query_8004scan( + protocol="OASF", + search=OASF_SKILL_FILTER, + limit=limit * 2, # fetch extra to account for workers without results + ) + + entries = [] + for agent in agents: + raw = agent.get("raw_metadata", {}) if isinstance(agent, dict) else {} + uri = agent.get("uri", agent.get("tokenURI", "")) or (raw.get("offchain_uri", "") if isinstance(raw, dict) else "") + agent_id = agent.get("agentId") or agent.get("agent_id") or agent.get("id") or agent.get("token_id") + name = agent.get("name", f"agent-{agent_id or '?'}") + registration = extract_registration_from_agent(agent) + if not registration: + continue + + # Look for autoresearch results in registration metadata. + meta = registration.get("metadata", registration.get("autoresearch", {})) + if isinstance(meta, dict): + val_bpb = meta.get("best_val_bpb", meta.get("val_bpb")) + if val_bpb is not None: + entries.append({ + "name": name, + "agent_id": agent_id, + "val_bpb": float(val_bpb), + "uri": uri, + "updated": meta.get("updated", agent.get("updated_at", "")), + }) + + # Sort by val_bpb ascending (lower is better) + entries.sort(key=lambda e: e["val_bpb"]) + return entries[:limit] + + def run_loop(self, train_py_path, prefer_endpoint=None, max_rounds=None): + """Run the continuous THINK/CLAIM/RUN/PUBLISH loop. + + Args: + train_py_path: Path to the train.py file + prefer_endpoint: Optional preferred worker endpoint + max_rounds: Max iterations (None = infinite) + """ + if not os.path.exists(train_py_path): + print(f"Error: {train_py_path} not found", file=sys.stderr) + return + + train_hash = compute_train_hash(train_py_path) + with open(train_py_path, "r") as f: + train_source = f.read() + + round_num = 0 + while max_rounds is None or round_num < max_rounds: + round_num += 1 + print(f"\n{'='*60}") + print(f"Round {round_num}") + print(f"{'='*60}") + + # THINK: Review current state + results = load_results() + best = min((r["val_bpb"] for r in results if "val_bpb" in r), default=None) + if best is not None: + print(f" Current best val_bpb: {best:.4f} ({len(results)} experiments)") + else: + print(" No previous results") + + # CLAIM: Discover and select worker + print("\n Discovering workers...") + if prefer_endpoint: + # Use preferred endpoint directly + print(f" Using preferred worker: {prefer_endpoint}") + endpoint = prefer_endpoint + else: + workers = self.discover_workers(limit=10) + available = [w for w in workers if w.get("endpoint") and w.get("x402")] + if not available: + print(" No available workers found. Waiting before retry...") + time.sleep(DEFAULT_LOOP_DELAY) + continue + # Pick first available (could be enhanced with pricing comparison) + worker = available[0] + endpoint = worker["endpoint"] + print(f" Selected worker: {worker['name']} at {endpoint}") + + # Probe pricing + print("\n Probing pricing...") + pricing = self.probe_worker(endpoint) + if not pricing: + print(" Could not get pricing. Skipping this round.") + time.sleep(DEFAULT_LOOP_DELAY) + continue + if not pricing.get("free"): + print(f" Cost: {pricing.get('maxAmountRequired', pricing.get('price', '?'))} USDC micro-units") + + # RUN: Submit experiment + print("\n Submitting experiment...") + experiment_id = generate_experiment_id() + result = self.submit_experiment(endpoint, train_source) + if not result: + print(" Experiment submission failed. Trying next round.") + time.sleep(DEFAULT_LOOP_DELAY) + continue + + # PUBLISH: Record result + val_bpb = result.get("val_bpb", result.get("metrics", {}).get("val_bpb")) + if val_bpb is not None: + published = self.publish_result( + val_bpb=float(val_bpb), + experiment_id=experiment_id, + train_hash=train_hash, + worker_endpoint=endpoint, + extra={"raw_result": result}, + ) + print(f"\n Result: val_bpb = {val_bpb:.4f}") + print(f" Saved as {experiment_id}") + if best is not None and float(val_bpb) < best: + print(f" NEW BEST! (improved from {best:.4f})") + else: + print(f"\n Experiment completed but no val_bpb in result:") + print(f" {json.dumps(result, indent=2)[:500]}") + # Still save for provenance + self.publish_result( + val_bpb=None, + experiment_id=experiment_id, + train_hash=train_hash, + worker_endpoint=endpoint, + extra={"raw_result": result, "note": "no val_bpb returned"}, + ) + + if max_rounds is not None and round_num >= max_rounds: + print(f"\n Completed {max_rounds} rounds.") + break + + print(f"\n Waiting {DEFAULT_LOOP_DELAY}s before next round...") + time.sleep(DEFAULT_LOOP_DELAY) + + +# --------------------------------------------------------------------------- +# CLI commands +# --------------------------------------------------------------------------- + +def cmd_discover(args): + """List available GPU workers from 8004scan.""" + limit = args.limit or 20 + print(f"Discovering GPU workers with OASF skill '{OASF_SKILL_FILTER}'...") + print(f" API: {SCAN_API_URL}") + print() + + coordinator = ObolCoordinator(chain=args.chain) + workers = coordinator.discover_workers(limit=limit) + + if not workers: + print("No workers found.") + return + + print(f"Found {len(workers)} worker(s):\n") + print(f"{'Name':30} {'Agent ID':>10} {'x402':>5} Endpoint") + print(f"{'-'*30} {'-'*10} {'-'*5} {'-'*50}") + + for w in workers: + x402_str = "yes" if w.get("x402") else "no" + endpoint = w.get("endpoint") or "(none)" + name = (w.get("name") or "?")[:30] + agent_id = w.get("agent_id", "?") + print(f"{name:30} {str(agent_id):>10} {x402_str:>5} {endpoint}") + + if w.get("skills"): + print(f"{'':30} {'':>10} {'':>5} Skills: {', '.join(w['skills'][:5])}") + + +def cmd_probe(args): + """Probe a worker endpoint for x402 pricing.""" + endpoint = args.endpoint.rstrip("/") + print(f"Probing {endpoint} ...") + print() + + coordinator = ObolCoordinator(chain=args.chain) + pricing = coordinator.probe_worker(endpoint) + + if not pricing: + print("Could not get pricing info from this endpoint.") + sys.exit(1) + + if pricing.get("free"): + print("This worker has no payment gate (free access).") + return + + print("x402 Pricing:") + print(f" Status: {pricing.get('status', '?')}") + print(f" Pay To: {pricing.get('payTo', '?')}") + print(f" Network: {pricing.get('network', '?')}") + print(f" Amount: {pricing.get('maxAmountRequired', pricing.get('price', '?'))} USDC micro-units") + if pricing.get("priceModel"): + print(f" Price Model: {pricing['priceModel']}") + if pricing.get("description"): + print(f" Description: {pricing['description']}") + if pricing.get("facilitatorURL"): + print(f" Facilitator: {pricing['facilitatorURL']}") + + +def cmd_submit(args): + """Submit a single experiment to a worker.""" + endpoint = args.endpoint.rstrip("/") + train_py_path = args.train_py + + if not os.path.exists(train_py_path): + print(f"Error: {train_py_path} not found", file=sys.stderr) + sys.exit(1) + + config = None + if args.config: + try: + config = json.loads(args.config) + except json.JSONDecodeError as e: + print(f"Error: invalid JSON config: {e}", file=sys.stderr) + sys.exit(1) + + with open(train_py_path, "r") as f: + train_source = f.read() + + train_hash = compute_train_hash(train_py_path) + experiment_id = generate_experiment_id() + + print(f"Submitting experiment to {endpoint}") + print(f" train.py: {train_py_path}") + print(f" train hash: {train_hash}") + print(f" experiment ID: {experiment_id}") + if config: + print(f" config: {json.dumps(config)}") + print() + + coordinator = ObolCoordinator(chain=args.chain) + result = coordinator.submit_experiment(endpoint, train_source, config) + + if not result: + print("Experiment submission failed.", file=sys.stderr) + sys.exit(1) + + val_bpb = result.get("val_bpb", result.get("metrics", {}).get("val_bpb")) + + # Publish provenance + coordinator.publish_result( + val_bpb=float(val_bpb) if val_bpb is not None else None, + experiment_id=experiment_id, + train_hash=train_hash, + worker_endpoint=endpoint, + extra={"raw_result": result}, + ) + + print("Experiment completed!") + print(f" Result: {json.dumps(result, indent=2)[:1000]}") + if val_bpb is not None: + print(f" val_bpb: {val_bpb}") + print(f" Saved to {RESULTS_FILE}") + + +def cmd_leaderboard(args): + """Show global leaderboard from 8004scan worker metadata.""" + limit = args.limit or 20 + print(f"Fetching global autoresearch leaderboard...") + print() + + coordinator = ObolCoordinator(chain=args.chain) + entries = coordinator.get_leaderboard(limit=limit) + + if not entries: + # Fall back to local results + print("No leaderboard data from 8004scan. Showing local results:\n") + results = load_results() + if not results: + print("No local results either.") + return + + # Group by worker, show best per worker + by_worker = {} + for r in results: + key = r.get("worker_endpoint", "local") + if r.get("val_bpb") is not None: + if key not in by_worker or r["val_bpb"] < by_worker[key]["val_bpb"]: + by_worker[key] = r + + sorted_workers = sorted(by_worker.values(), key=lambda r: r["val_bpb"]) + print(f"{'Rank':>5} {'val_bpb':>10} {'Experiment':20} Worker") + print(f"{'-'*5} {'-'*10} {'-'*20} {'-'*40}") + for i, r in enumerate(sorted_workers[:limit], 1): + print(f"{i:>5} {r['val_bpb']:>10.4f} {r.get('experiment_id', '?'):20} {r.get('worker_endpoint', '?')}") + return + + print(f"{'Rank':>5} {'val_bpb':>10} {'Agent ID':>10} {'Name':30} Updated") + print(f"{'-'*5} {'-'*10} {'-'*10} {'-'*30} {'-'*20}") + for i, e in enumerate(entries, 1): + name = (e.get("name") or "?")[:30] + print(f"{i:>5} {e['val_bpb']:>10.4f} {str(e.get('agent_id', '?')):>10} {name:30} {e.get('updated', '?')}") + + +def cmd_loop(args): + """Run the continuous experiment loop.""" + train_py_path = args.train_py + prefer = args.prefer + rounds = args.rounds + + if not os.path.exists(train_py_path): + print(f"Error: {train_py_path} not found", file=sys.stderr) + sys.exit(1) + + coordinator = ObolCoordinator(chain=args.chain) + print(f"Starting experiment loop") + print(f" train.py: {train_py_path}") + print(f" chain: {coordinator.chain}") + if prefer: + print(f" prefer: {prefer}") + if rounds: + print(f" rounds: {rounds}") + print() + + try: + coordinator.run_loop(train_py_path, prefer_endpoint=prefer, max_rounds=rounds) + except KeyboardInterrupt: + print("\n\nLoop interrupted by user.") + results = load_results() + if results: + best = min((r["val_bpb"] for r in results if r.get("val_bpb") is not None), default=None) + print(f"Total experiments: {len(results)}") + if best is not None: + print(f"Best val_bpb: {best:.4f}") + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description="Coordinate distributed autoresearch experiments via obol-stack" + ) + parser.add_argument("--chain", default=None, help="Chain/network for payments (default: base-sepolia)") + sub = parser.add_subparsers(dest="command", help="Command to run") + + # discover + p_discover = sub.add_parser("discover", help="List available GPU workers") + p_discover.add_argument("--limit", type=int, default=20, help="Max results (default: 20)") + + # probe + p_probe = sub.add_parser("probe", help="Check x402 pricing for a worker") + p_probe.add_argument("endpoint", help="Worker endpoint URL") + + # submit + p_submit = sub.add_parser("submit", help="Submit experiment to a worker") + p_submit.add_argument("endpoint", help="Worker endpoint URL") + p_submit.add_argument("train_py", help="Path to train.py") + p_submit.add_argument("--config", default=None, help="JSON config overrides") + + # leaderboard + p_leader = sub.add_parser("leaderboard", help="Show global rankings") + p_leader.add_argument("--limit", type=int, default=20, help="Max results (default: 20)") + + # loop + p_loop = sub.add_parser("loop", help="Run continuous experiment loop") + p_loop.add_argument("train_py", help="Path to train.py") + p_loop.add_argument("--prefer", default=None, help="Preferred worker endpoint URL") + p_loop.add_argument("--rounds", type=int, default=None, help="Max rounds (default: infinite)") + + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + commands = { + "discover": cmd_discover, + "probe": cmd_probe, + "submit": cmd_submit, + "leaderboard": cmd_leaderboard, + "loop": cmd_loop, + } + + try: + commands[args.command](args) + except KeyboardInterrupt: + print("\nInterrupted.", file=sys.stderr) + sys.exit(130) + except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e: + print(f"Network error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/internal/embed/skills/autoresearch-worker/SKILL.md b/internal/embed/skills/autoresearch-worker/SKILL.md new file mode 100644 index 00000000..034d177f --- /dev/null +++ b/internal/embed/skills/autoresearch-worker/SKILL.md @@ -0,0 +1,128 @@ +--- +name: autoresearch-worker +description: "Run a GPU worker that accepts paid autoresearch experiments over HTTP and monetize it through obol sell http." +metadata: { "openclaw": { "emoji": "🖥️", "requires": { "bins": ["python3", "uv"] } } } +--- + +# Autoresearch Worker + +Run a single-GPU worker that accepts `train.py` experiments over HTTP, executes them one at a time, stores results on disk, and exposes a simple API that can be gated with x402 using `obol sell http`. + +## When to Use + +- Selling GPU-backed autoresearch experiment execution +- Exposing a remote `POST /experiment` endpoint for the autoresearch coordinator +- Running a worker on a Linux GPU host with `k3s` +- Testing the sell-side of the GPU marketplace locally before exposing it publicly + +## When NOT to Use + +- Coordinating experiments across many workers — use `autoresearch-coordinator` +- Publishing optimized checkpoints as inference — use `autoresearch` +- Selling a generic HTTP app — use `sell` + +## What the Worker Exposes + +- `GET /health` / `GET /healthz` — worker health +- `GET /status` — busy/idle plus current, last, and best result +- `GET /best` — best known result +- `GET /experiments/` — fetch a stored result +- `POST /experiment` — submit a `train.py` experiment + +The worker is intentionally simple: +- one experiment at a time +- one GPU worker process +- local disk for logs/results +- no distributed queue or scheduler inside the worker + +## Quick Start + +### 1. Start the worker API + +```bash +python3 scripts/worker_api.py serve \ + --repo /path/to/autoresearch \ + --data-dir /data \ + --host 0.0.0.0 \ + --port 8080 \ + --timeout 300 +``` + +The repo path should point at a prepared autoresearch workdir/repo with the dependencies already available via `uv`. + +### 2. Verify the worker locally + +```bash +curl -s http://127.0.0.1:8080/health | jq . +curl -s http://127.0.0.1:8080/status | jq . +``` + +### 3. Deploy the worker behind a Kubernetes Service + +For production GPU sellers, prefer `k3s` on the GPU host. The monetization path is cluster-based, so the worker should be reachable as a Kubernetes Service. + +### 4. Monetize it with x402 + +```bash +obol sell http autoresearch-worker \ + --namespace autoresearch \ + --upstream autoresearch-worker \ + --port 8080 \ + --health-path /health \ + --wallet 0xYourWalletAddress \ + --chain base-sepolia \ + --per-hour 0.50 \ + --path /services/autoresearch-worker \ + --register \ + --register-name "GPU Worker Alpha" \ + --register-description "A GPU worker for paid autoresearch experiments" \ + --register-skills machine_learning/model_optimization \ + --register-domains technology/artificial_intelligence/research \ + --register-metadata gpu=A100-80GB \ + --register-metadata framework=pytorch \ + --register-metadata best_val_bpb=1.234 \ + --register-metadata total_experiments=42 +``` + +This creates a `ServiceOffer` that: +- health-checks the worker +- creates a payment-gated public route +- optionally registers the worker on ERC-8004 for discovery + +## Request Format + +Submit an experiment with: + +```json +{ + "train_py": "print('hello world')", + "config": { + "batch_size": 64, + "learning_rate": 0.001 + } +} +``` + +If the request makes it through the x402 gate, the worker runs the experiment synchronously and returns a result JSON document. + +## Important Constraints + +- **Single-flight** — one experiment at a time; concurrent submissions return `409` +- **Arbitrary code execution** — submitted `train.py` is executed, so run this only on infrastructure dedicated to the worker +- **Local persistence** — results are stored under `$DATA_DIR/autoresearch-worker/results/` +- **Approx pricing** — `--per-hour` is converted into a request price using the current 5-minute experiment budget assumption +- **Cluster-based monetization** — `obol sell http` expects a Kubernetes Service, so `k3s` is the preferred deployment target for GPU sellers + +## Files + +- `scripts/worker_api.py` — HTTP worker service +- `references/worker-api.md` — endpoint and deployment details +- `docker/Dockerfile` — container image for the worker + +## References + +- `references/worker-api.md` — worker endpoints, result schema, and deployment notes +- `references/k3s-gpu-worker.md` — minimal k3s Deployment + Service example +- `references/claude-code-seller-prompt.md` — prompt to turn a plain autoresearch@home join flow into a real seller-oriented flow +- See also: `sell` for ServiceOffer monetization +- See also: `autoresearch-coordinator` for the buyer/coordinator side of remote experiments diff --git a/internal/embed/skills/autoresearch-worker/docker/Dockerfile b/internal/embed/skills/autoresearch-worker/docker/Dockerfile new file mode 100644 index 00000000..3df8ff2d --- /dev/null +++ b/internal/embed/skills/autoresearch-worker/docker/Dockerfile @@ -0,0 +1,32 @@ +FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + git \ + python3 \ + python3-pip \ + python3-venv \ + && rm -rf /var/lib/apt/lists/* + +# Install uv for running the autoresearch repo environment. +RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ + && mv /root/.local/bin/uv /usr/local/bin/uv + +WORKDIR /app + +# Copy the worker API script. Build context should be the skill directory: +# docker build -f docker/Dockerfile -t autoresearch-worker .. +COPY scripts/worker_api.py /app/worker_api.py + +ENV DATA_DIR=/data \ + AUTORESEARCH_REPO=/data/autoresearch \ + EXPERIMENT_TIMEOUT_SECONDS=300 \ + TRAIN_COMMAND="uv run train.py" + +VOLUME ["/data"] +EXPOSE 8080 + +ENTRYPOINT ["python3", "/app/worker_api.py", "serve", "--repo", "/data/autoresearch", "--data-dir", "/data", "--host", "0.0.0.0", "--port", "8080"] diff --git a/internal/embed/skills/autoresearch-worker/references/claude-code-seller-prompt.md b/internal/embed/skills/autoresearch-worker/references/claude-code-seller-prompt.md new file mode 100644 index 00000000..5607223e --- /dev/null +++ b/internal/embed/skills/autoresearch-worker/references/claude-code-seller-prompt.md @@ -0,0 +1,199 @@ +# Claude Code seller onboarding prompt + +Use this prompt when a GPU owner opens `mutable-state-inc/autoresearch-at-home` in Claude Code and you want Claude to do more than merely join the Ensue swarm. + +The key distinction is: + +- **joining autoresearch@home** => collaborative contributor mode +- **selling GPU through Obol Stack** => contributor mode **plus** worker API **plus** x402-gated seller exposure + +## Prompt + +```text +Read this repository carefully and do NOT stop at merely joining the autoresearch@home swarm. + +Primary goal: +Turn this machine into a real autoresearch contributor AND, if possible on this machine, a paid GPU seller for remote autoresearch experiments via Obol Stack/x402. + +You must optimize for actual working state, not just partial setup. + +Success criteria, in order: +1. Verify this machine has a usable CUDA GPU and can run autoresearch experiments. +2. Join the autoresearch@home Ensue swarm correctly. +3. Start a working autoresearch worker API on this machine that can accept remote experiments. +4. If Obol Stack is available on this machine, expose that worker as a paid x402-gated service and register it with useful metadata for discovery. +5. Leave behind a concise status report with: + - chosen codename + - swarm join status + - local worker status + - paid seller status + - public/cluster endpoint(s) + - exact blockers if anything could not be completed + - exact next command the human should run + +Important: +- Do not confuse “joined autoresearch@home” with “selling GPU”. +- Joining the swarm is necessary but NOT sufficient. +- Paid seller behavior requires a worker endpoint plus Obol/x402 exposure. +- Be honest about what is actually verified. +- Do not claim success for any step you did not verify. +- Work autonomously and do not ask for confirmation unless blocked by a true human-only step. +- Human-only steps include: + - Ensue claim_url / email verification + - entering third-party tokens if not already available + - cloud account login +- If blocked, print a minimal, exact unblock request and otherwise keep going. + +Read these files fully first: +- README.md +- program.md +- collab.md +- coordinator.py +- prepare.py +- train.py + +Then execute this plan: + +PHASE 1 — GPU + local autoresearch readiness +1. Run `nvidia-smi`. + - If no GPU is available, stop the paid-seller path and print: + “BLOCKED: no CUDA GPU available” + - If GPU exists, record GPU model and VRAM. +2. Run `uv sync`. +3. Check whether `~/.cache/autoresearch/` already contains prepared data/tokenizer artifacts. + - If missing, run `uv run prepare.py`. +4. Ensure `results.tsv` exists with the correct header if needed. +5. Create or switch to a fresh branch for this run if appropriate. + +PHASE 2 — Join autoresearch@home correctly +6. Check for `ENSUE_API_KEY` or `.autoresearch-key`. +7. If neither exists: + - pick 3 good single-word codename suggestions + - ask the human to choose one if required + - register the agent using the Ensue API + - save the `api_key` to `.autoresearch-key` + - show the human the exact `claim_url` and verification code + - pause only until this human verification step is complete +8. Initialize the coordinator: + - `from coordinator import Coordinator` + - `coord = Coordinator()` + - set `coord.agent_id` to the chosen codename +9. Join the hub with the invite token from this repo. +10. Run `coord.announce()`. +11. Pull the best config for this hardware tier: + - prefer `coord.pull_best_config_for_tier()` + - fall back to the global best if needed +12. If the pulled config is better than local baseline, adopt it into `train.py` and commit that change. + +PHASE 3 — Do not stop at contributor mode; build seller mode +13. Determine whether this machine can support Obol seller flow: + - check whether `obol` CLI exists + - check whether an Obol Stack cluster is running + - check whether `obol sell http` is available +14. If Obol Stack is not available: + - still get the local worker API running + - print clearly that paid seller mode is blocked by missing Obol Stack + - continue contributing to the swarm if possible +15. If Obol Stack is available, continue to full seller setup. + +PHASE 4 — Get a real worker API running +16. You need a worker API that accepts remote experiments. +17. If this repo already has a working worker API, use it. +18. If not, fetch the current implementation from Obol Stack: + - inspect `ObolNetwork/obol-stack` + - prefer main if merged + - otherwise use the current feature work if needed + - specifically look for the autoresearch worker implementation and current seller flow +19. Start a worker API on port 8080 that exposes: + - GET /health or /healthz + - GET /status + - GET /best + - GET /experiments/ + - POST /experiment +20. Verify locally with real requests that: + - health endpoint works + - a trivial experiment submission works + - the worker stores results and returns `val_bpb` if available +21. If the worker cannot be started, print the exact blocker and stop seller mode. + +PHASE 5 — Paid seller mode through Obol Stack +22. If the worker runs on the same machine as Obol Stack, choose the simplest viable path to expose it to the cluster. +23. If `obol sell http` requires an in-cluster Service, create the smallest safe relay/proxy needed so the cluster can reach the worker. +24. Monetize the worker with `obol sell http`. + Use a path like `/services/autoresearch-worker`. +25. Register useful metadata, including at minimum: + - GPU model + - framework=autoresearch + - runtime or source indicating this machine + - optionally current best_val_bpb if known +26. Use OASF discovery metadata appropriate for model optimization. +27. Verify: + - ServiceOffer exists + - ServiceOffer reaches Ready if possible + - registration JSON exists + - registration JSON includes x402Support + - registration JSON includes a service endpoint + - registration JSON includes OASF skills/domains + - registration JSON includes metadata fields +28. If a tunnel/public URL is available, record it. +29. If no public URL is available, still record the cluster-local seller state and exact blocker to public access. + +PHASE 6 — Start contributing for real +30. Once worker/seller setup is done or blocked clearly, begin the real collaborative experiment loop: + - THINK + - CLAIM + - RUN + - PUBLISH +31. Do at least one real experiment cycle if feasible. +32. Publish: + - result + - insight + - hypothesis + +OUTPUT REQUIREMENTS +At the end, create a short file in the repo root called `SELLER_STATUS.md` containing: +- machine GPU info +- Ensue join status +- codename +- worker API status +- paid seller status +- commands that were run +- URLs/endpoints created +- blockers and next steps + +Also print a concise terminal summary: +- CONTRIBUTOR: READY / BLOCKED +- WORKER API: READY / BLOCKED +- PAID SELLER: READY / BLOCKED +- NEXT ACTION REQUIRED FROM HUMAN: + +Behavior constraints: +- Do not stop after “join autoresearch@home”. +- Do not just explain what should be done; do it. +- If a step fails, debug it before giving up. +- If you cannot fully complete paid seller mode, still leave the machine in the best possible partial state and explain the exact remaining blocker. +``` + +## Why this prompt exists + +The default upstream collaborative flow is optimized for joining the Ensue swarm and contributing experiments. That is useful, but it does not automatically turn the machine into a paid seller. + +This prompt forces the agent to: +- distinguish contributor mode from seller mode +- stand up a worker API +- attempt Obol/x402 exposure when possible +- verify what actually works +- leave behind a concrete status report + +## Best use cases + +Use this prompt when: +- a GPU owner wants to join `autoresearch-at-home` +- you also want that machine to become a paid worker if possible +- you want Claude Code to operate autonomously and not stop at partial onboarding + +## Related references + +- `worker-api.md` — what the worker must expose +- `k3s-gpu-worker.md` — cluster deployment pattern for sellers +- `../SKILL.md` — overall autoresearch-worker operator guidance diff --git a/internal/embed/skills/autoresearch-worker/references/k3s-gpu-worker.md b/internal/embed/skills/autoresearch-worker/references/k3s-gpu-worker.md new file mode 100644 index 00000000..e2606094 --- /dev/null +++ b/internal/embed/skills/autoresearch-worker/references/k3s-gpu-worker.md @@ -0,0 +1,76 @@ +# k3s GPU Worker Deployment Example + +This example shows the minimal shape for running the autoresearch worker inside a `k3s` cluster on a GPU host. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: autoresearch-worker + namespace: autoresearch +spec: + replicas: 1 + selector: + matchLabels: + app: autoresearch-worker + template: + metadata: + labels: + app: autoresearch-worker + spec: + containers: + - name: worker + image: autoresearch-worker:dev + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8080 + env: + - name: DATA_DIR + value: /data + - name: AUTORESEARCH_REPO + value: /data/autoresearch + - name: EXPERIMENT_TIMEOUT_SECONDS + value: "300" + resources: + limits: + nvidia.com/gpu: 1 + volumeMounts: + - name: worker-data + mountPath: /data + volumes: + - name: worker-data + persistentVolumeClaim: + claimName: autoresearch-worker-data +--- +apiVersion: v1 +kind: Service +metadata: + name: autoresearch-worker + namespace: autoresearch +spec: + selector: + app: autoresearch-worker + ports: + - name: http + port: 8080 + targetPort: 8080 +``` + +After the Service exists, expose it with: + +```bash +obol sell http autoresearch-worker \ + --namespace autoresearch \ + --upstream autoresearch-worker \ + --port 8080 \ + --health-path /health \ + --wallet 0xYourWalletAddress \ + --chain base-sepolia \ + --per-hour 0.50 \ + --path /services/autoresearch-worker \ + --register \ + --register-name "GPU Worker Alpha" \ + --register-description "A GPU worker for paid autoresearch experiments" \ + --register-skills machine_learning/model_optimization \ + --register-domains technology/artificial_intelligence/research +``` diff --git a/internal/embed/skills/autoresearch-worker/references/worker-api.md b/internal/embed/skills/autoresearch-worker/references/worker-api.md new file mode 100644 index 00000000..def52c71 --- /dev/null +++ b/internal/embed/skills/autoresearch-worker/references/worker-api.md @@ -0,0 +1,145 @@ +# Autoresearch Worker API + +The worker API is a small synchronous HTTP service for selling GPU-backed autoresearch experiments. + +## Endpoints + +### `GET /health` and `GET /healthz` + +Returns basic health/config status. + +Example response: + +```json +{ + "status": "ok", + "busy": false, + "repo": "/data/autoresearch", + "timeoutSeconds": 300 +} +``` + +### `GET /status` + +Returns the live worker status including current, last, and best results. + +### `GET /best` + +Returns the best result seen so far, or `404` if no completed result exists yet. + +### `GET /experiments/` + +Returns the stored JSON result for one experiment. + +### `POST /experiment` + +Runs an experiment. Expected request body: + +```json +{ + "train_py": "print('training logic here')", + "config": { + "batch_size": 64, + "learning_rate": 0.001 + }, + "experiment_id": "optional-custom-id" +} +``` + +Special-case probe body: + +```json +{ + "probe": true +} +``` + +This returns `200` with a small readiness payload when the request reaches the worker directly. In the x402-gated flow, unauthenticated probe requests should usually be intercepted before the worker and turned into `402 Payment Required`. + +## Result Shape + +Example response: + +```json +{ + "experiment_id": "exp-20260312-deadbeef", + "status": "completed", + "return_code": 0, + "val_bpb": 1.0234, + "train_hash": "sha256:...", + "artifact_path": "/data/autoresearch-worker/results/exp-20260312-deadbeef/work/model.gguf", + "log_path": "/data/autoresearch-worker/results/exp-20260312-deadbeef/run.log", + "startedAt": "2026-03-12T12:00:00+00:00", + "finishedAt": "2026-03-12T12:05:00+00:00", + "durationSeconds": 300.0, + "config": {} +} +``` + +`status` may be: +- `completed` +- `failed` +- `timeout` + +## Data Layout + +The worker stores state under: + +```text +$DATA_DIR/autoresearch-worker/ + best.json + results.jsonl + results/ + / + config.json + train.py + run.log + result.json + work/ +``` + +## Deployment Notes + +## Recommended: k3s on the GPU host + +This is the cleanest production path because: +- the worker is reachable through a Kubernetes Service +- `obol sell http` can point at that service directly +- GPU access can be provided via the host's Kubernetes GPU setup + +### Minimal deployment pattern + +1. Build the image: + +```bash +cd internal/embed/skills/autoresearch-worker +docker build -f docker/Dockerfile -t autoresearch-worker:dev . +``` + +2. Run it on a GPU host with the autoresearch repo mounted at `/data/autoresearch`. + +3. Expose it as a Kubernetes Service named `autoresearch-worker` in namespace `autoresearch`. + +4. Monetize it with: + +```bash +obol sell http autoresearch-worker \ + --namespace autoresearch \ + --upstream autoresearch-worker \ + --port 8080 \ + --health-path /health \ + --wallet 0xYourWalletAddress \ + --chain base-sepolia \ + --per-hour 0.50 \ + --path /services/autoresearch-worker \ + --register \ + --register-name "GPU Worker Alpha" \ + --register-description "A GPU worker for paid autoresearch experiments" \ + --register-skills machine_learning/model_optimization \ + --register-domains technology/artificial_intelligence/research +``` + +## Security Note + +This API executes submitted `train.py` code. Treat the worker as dedicated, untrusted-code infrastructure. +Do not run it on a machine that also hosts unrelated workloads or sensitive data. diff --git a/internal/embed/skills/autoresearch-worker/scripts/worker_api.py b/internal/embed/skills/autoresearch-worker/scripts/worker_api.py new file mode 100644 index 00000000..619b877c --- /dev/null +++ b/internal/embed/skills/autoresearch-worker/scripts/worker_api.py @@ -0,0 +1,471 @@ +#!/usr/bin/env python3 +"""worker_api.py -- GPU worker HTTP API for autoresearch experiments. + +Runs submitted train.py experiments one at a time, stores results on disk, and +exposes a minimal HTTP API suitable for x402-gated selling via `obol sell http`. + +Endpoints: + GET /health, /healthz Worker health and config summary + GET /status Busy/idle status plus last/best results + GET /best Best known result + GET /experiments/ Fetch a stored experiment result + POST /experiment Submit a train.py experiment (or probe with {"probe": true}) + +Environment: + DATA_DIR Base directory for worker state (default: /data) + AUTORESEARCH_REPO Template repo/workdir copied per experiment + EXPERIMENT_TIMEOUT_SECONDS Max runtime per experiment (default: 300) + TRAIN_COMMAND Command to run inside experiment workdir (default: uv run train.py) +""" + +from __future__ import annotations + +import argparse +import csv +import hashlib +import json +import os +import re +import shlex +import shutil +import subprocess +import sys +import threading +import time +from dataclasses import dataclass +from datetime import datetime, timezone +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path +from typing import Any +from urllib.parse import urlparse +import uuid + +CHECKPOINT_EXTENSIONS = (".gguf", ".safetensors", ".pt", ".pth", ".bin") +COPY_IGNORE = shutil.ignore_patterns( + ".git", + ".venv", + "__pycache__", + ".pytest_cache", + ".mypy_cache", + "worktrees", + "queue", + "results", +) + + +class BusyError(RuntimeError): + pass + + +@dataclass +class WorkerConfig: + repo: Path + data_dir: Path + command: list[str] + timeout_seconds: int + + +def utc_now() -> str: + return datetime.now(timezone.utc).isoformat() + + +def sha256_text(text: str) -> str: + return f"sha256:{hashlib.sha256(text.encode()).hexdigest()}" + + +def generate_experiment_id() -> str: + return f"exp-{datetime.now(timezone.utc).strftime('%Y%m%d')}-{uuid.uuid4().hex[:8]}" + + +def extract_val_bpb(log_text: str, workdir: Path | None = None) -> float | None: + """Extract val_bpb from log output or results.tsv. + + Prefers structured JSON payloads, then common textual patterns, then the + smallest val_bpb recorded in a results.tsv file if present. + """ + stripped = (log_text or "").strip() + if stripped: + # JSON payloads emitted as either a top-level object or metrics object. + try: + data = json.loads(stripped) + if isinstance(data, dict): + if data.get("val_bpb") is not None: + return float(data["val_bpb"]) + metrics = data.get("metrics") + if isinstance(metrics, dict) and metrics.get("val_bpb") is not None: + return float(metrics["val_bpb"]) + except (json.JSONDecodeError, ValueError, TypeError): + pass + + patterns = [ + r"\bval_bpb\b\s*[:=]\s*([0-9]+(?:\.[0-9]+)?)", + r"\bbest[_ ]val[_ ]bpb\b\s*[:=]\s*([0-9]+(?:\.[0-9]+)?)", + r'"val_bpb"\s*:\s*([0-9]+(?:\.[0-9]+)?)', + ] + for pattern in patterns: + match = re.search(pattern, stripped, flags=re.IGNORECASE) + if match: + try: + return float(match.group(1)) + except ValueError: + pass + + if workdir is not None: + results_path = Path(workdir) / "results.tsv" + if results_path.exists(): + best: float | None = None + with open(results_path, "r", encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter="\t") + for row in reader: + value = row.get("val_bpb") + if not value: + continue + try: + parsed = float(value) + except ValueError: + continue + if best is None or parsed < best: + best = parsed + return best + + return None + + +def find_artifact(root: Path) -> Path | None: + """Return the newest checkpoint-like artifact under root.""" + candidates: list[Path] = [] + for ext in CHECKPOINT_EXTENSIONS: + candidates.extend(root.rglob(f"*{ext}")) + candidates = [p for p in candidates if p.is_file() and ".git" not in p.parts] + if not candidates: + return None + return max(candidates, key=lambda p: p.stat().st_mtime) + + +def choose_best_result(existing: dict[str, Any] | None, candidate: dict[str, Any] | None) -> dict[str, Any] | None: + """Pick the better result by lower val_bpb, treating missing values as worse.""" + if not candidate: + return existing + if candidate.get("val_bpb") is None: + return existing + if not existing or existing.get("val_bpb") is None: + return candidate + try: + if float(candidate["val_bpb"]) < float(existing["val_bpb"]): + return candidate + except (ValueError, TypeError): + return existing + return existing + + +class WorkerState: + def __init__(self, config: WorkerConfig): + self.config = config + self.lock = threading.Lock() + self.busy = False + self.current: dict[str, Any] | None = None + self.last_result: dict[str, Any] | None = None + self.base_dir = config.data_dir / "autoresearch-worker" + self.results_dir = self.base_dir / "results" + self.best_path = self.base_dir / "best.json" + self.history_path = self.base_dir / "results.jsonl" + self.base_dir.mkdir(parents=True, exist_ok=True) + self.results_dir.mkdir(parents=True, exist_ok=True) + self.best_result = self._load_json(self.best_path) + + def _load_json(self, path: Path) -> dict[str, Any] | None: + if not path.exists(): + return None + try: + return json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return None + + def snapshot(self) -> dict[str, Any]: + with self.lock: + return { + "status": "busy" if self.busy else "idle", + "current": self.current, + "last": self.last_result, + "best": self.best_result, + "repo": str(self.config.repo), + "command": self.config.command, + "timeoutSeconds": self.config.timeout_seconds, + } + + def record_result(self, result: dict[str, Any]) -> None: + exp_dir = self.results_dir / result["experiment_id"] + exp_dir.mkdir(parents=True, exist_ok=True) + (exp_dir / "result.json").write_text(json.dumps(result, indent=2), encoding="utf-8") + with open(self.history_path, "a", encoding="utf-8") as f: + f.write(json.dumps(result) + "\n") + best = choose_best_result(self.best_result, result) + if best is not self.best_result and best is not None: + self.best_result = best + self.best_path.write_text(json.dumps(best, indent=2), encoding="utf-8") + self.last_result = result + + def run_experiment(self, train_py_source: str, config_overrides: dict[str, Any] | None = None, experiment_id: str | None = None) -> dict[str, Any]: + if not train_py_source.strip(): + raise ValueError("train_py is required") + + with self.lock: + if self.busy: + raise BusyError("worker is already running an experiment") + exp_id = experiment_id or generate_experiment_id() + self.busy = True + self.current = { + "experiment_id": exp_id, + "startedAt": utc_now(), + } + + try: + result = self._run_experiment_impl(exp_id, train_py_source, config_overrides or {}) + self.record_result(result) + return result + finally: + with self.lock: + self.busy = False + self.current = None + + def _run_experiment_impl(self, experiment_id: str, train_py_source: str, config_overrides: dict[str, Any]) -> dict[str, Any]: + exp_dir = self.results_dir / experiment_id + workdir = exp_dir / "work" + exp_dir.mkdir(parents=True, exist_ok=True) + + started = time.time() + started_at = utc_now() + train_hash = sha256_text(train_py_source) + artifact: Path | None = None + status = "completed" + return_code = 0 + stdout = "" + stderr = "" + note = None + + if self.config.repo.exists(): + shutil.copytree(self.config.repo, workdir, ignore=COPY_IGNORE) + else: + workdir.mkdir(parents=True, exist_ok=True) + + train_path = workdir / "train.py" + train_path.write_text(train_py_source, encoding="utf-8") + (exp_dir / "train.py").write_text(train_py_source, encoding="utf-8") + + config_path = exp_dir / "config.json" + config_path.write_text(json.dumps(config_overrides, indent=2), encoding="utf-8") + + env = os.environ.copy() + env.setdefault("PYTHONUNBUFFERED", "1") + env["AUTORESEARCH_EXPERIMENT_ID"] = experiment_id + env["AUTORESEARCH_EXPERIMENT_DIR"] = str(exp_dir) + env["AUTORESEARCH_EXPERIMENT_CONFIG"] = str(config_path) + + try: + completed = subprocess.run( + self.config.command, + cwd=workdir, + env=env, + capture_output=True, + text=True, + timeout=self.config.timeout_seconds, + check=False, + ) + stdout = completed.stdout + stderr = completed.stderr + return_code = completed.returncode + if completed.returncode != 0: + status = "failed" + note = f"command exited with status {completed.returncode}" + except subprocess.TimeoutExpired as exc: + stdout = exc.stdout or "" + stderr = exc.stderr or "" + return_code = -1 + status = "timeout" + note = f"experiment exceeded timeout of {self.config.timeout_seconds}s" + except OSError as exc: + stderr = str(exc) + return_code = -1 + status = "failed" + note = f"failed to execute command: {exc}" + + log_text = stdout + if stderr: + log_text += ("\n--- stderr ---\n" if log_text else "") + stderr + log_path = exp_dir / "run.log" + log_path.write_text(log_text, encoding="utf-8") + + val_bpb = extract_val_bpb(log_text, workdir) + artifact = find_artifact(workdir) + + result = { + "experiment_id": experiment_id, + "status": status, + "return_code": return_code, + "val_bpb": val_bpb, + "train_hash": train_hash, + "artifact_path": str(artifact) if artifact else None, + "log_path": str(log_path), + "startedAt": started_at, + "finishedAt": utc_now(), + "durationSeconds": round(time.time() - started, 3), + "config": config_overrides, + } + if note: + result["note"] = note + return result + + +class WorkerHandler(BaseHTTPRequestHandler): + state: WorkerState | None = None + + def log_message(self, format: str, *args: Any) -> None: + sys.stderr.write("%s - - [%s] %s\n" % (self.address_string(), self.log_date_time_string(), format % args)) + + def _json(self, status: int, payload: dict[str, Any]) -> None: + body = json.dumps(payload).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def _read_json(self) -> dict[str, Any]: + length = int(self.headers.get("Content-Length", "0") or "0") + if length <= 0: + return {} + raw = self.rfile.read(length) + if not raw: + return {} + try: + data = json.loads(raw) + except json.JSONDecodeError as exc: + raise ValueError(f"invalid JSON body: {exc}") from exc + if not isinstance(data, dict): + raise ValueError("JSON body must be an object") + return data + + def do_GET(self) -> None: + assert self.state is not None + path = urlparse(self.path).path + if path in ("/health", "/healthz"): + self._json(200, { + "status": "ok", + "busy": self.state.snapshot()["status"] == "busy", + "repo": str(self.state.config.repo), + "timeoutSeconds": self.state.config.timeout_seconds, + }) + return + if path == "/status": + self._json(200, self.state.snapshot()) + return + if path == "/best": + if self.state.best_result is None: + self._json(404, {"error": "no completed experiments yet"}) + return + self._json(200, self.state.best_result) + return + if path.startswith("/experiments/"): + exp_id = path.rsplit("/", 1)[-1] + result_path = self.state.results_dir / exp_id / "result.json" + if not result_path.exists(): + self._json(404, {"error": f"experiment {exp_id} not found"}) + return + self._json(200, json.loads(result_path.read_text(encoding="utf-8"))) + return + self._json(404, {"error": f"unknown endpoint: {path}"}) + + def do_POST(self) -> None: + assert self.state is not None + path = urlparse(self.path).path + if path != "/experiment": + self._json(404, {"error": f"unknown endpoint: {path}"}) + return + + try: + payload = self._read_json() + except ValueError as exc: + self._json(400, {"error": str(exc)}) + return + + if payload.get("probe") is True: + self._json(200, { + "status": "ok", + "probe": True, + "busy": self.state.snapshot()["status"] == "busy", + "timeoutSeconds": self.state.config.timeout_seconds, + }) + return + + train_py = payload.get("train_py", payload.get("trainPy")) + if not isinstance(train_py, str) or not train_py.strip(): + self._json(400, {"error": "train_py string is required"}) + return + + config_overrides = payload.get("config") + if config_overrides is None: + config_overrides = {} + if not isinstance(config_overrides, dict): + self._json(400, {"error": "config must be a JSON object when provided"}) + return + + experiment_id = payload.get("experiment_id", payload.get("experimentId")) + if experiment_id is not None and not isinstance(experiment_id, str): + self._json(400, {"error": "experiment_id must be a string when provided"}) + return + + try: + result = self.state.run_experiment(train_py, config_overrides, experiment_id) + except BusyError as exc: + self._json(409, {"error": str(exc), "status": "busy"}) + return + except ValueError as exc: + self._json(400, {"error": str(exc)}) + return + except Exception as exc: + self._json(500, {"error": f"internal worker error: {exc}"}) + return + + self._json(200, result) + + +def make_server(state: WorkerState, host: str, port: int) -> ThreadingHTTPServer: + WorkerHandler.state = state + return ThreadingHTTPServer((host, port), WorkerHandler) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run the autoresearch GPU worker API.") + parser.add_argument("command", nargs="?", default="serve", choices=["serve"], help="Command to run (default: serve)") + parser.add_argument("--repo", default=os.environ.get("AUTORESEARCH_REPO", "/data/autoresearch"), help="Path to the base autoresearch repo/workdir") + parser.add_argument("--data-dir", default=os.environ.get("DATA_DIR", "/data"), help="Directory for worker state and results") + parser.add_argument("--host", default="0.0.0.0", help="Listen host") + parser.add_argument("--port", type=int, default=8080, help="Listen port") + parser.add_argument("--timeout", type=int, default=int(os.environ.get("EXPERIMENT_TIMEOUT_SECONDS", "300")), help="Max experiment runtime in seconds") + parser.add_argument("--train-command", default=os.environ.get("TRAIN_COMMAND", "uv run train.py"), help="Command used to run train.py") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + command = shlex.split(args.train_command) + config = WorkerConfig( + repo=Path(args.repo), + data_dir=Path(args.data_dir), + command=command, + timeout_seconds=args.timeout, + ) + state = WorkerState(config) + server = make_server(state, args.host, args.port) + print(f"autoresearch-worker listening on http://{args.host}:{args.port}") + print(f"repo={config.repo} data_dir={config.data_dir} timeout={config.timeout_seconds}s command={' '.join(config.command)}") + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nshutting down worker") + finally: + server.server_close() + + +if __name__ == "__main__": + main() diff --git a/internal/embed/skills/autoresearch/SKILL.md b/internal/embed/skills/autoresearch/SKILL.md new file mode 100644 index 00000000..33815f6c --- /dev/null +++ b/internal/embed/skills/autoresearch/SKILL.md @@ -0,0 +1,133 @@ +--- +name: autoresearch +description: "Run autonomous LLM optimization experiments (autoresearch) and publish optimized models for paid inference via x402." +metadata: { "openclaw": { "emoji": "🧪", "requires": { "bins": ["python3", "uv"] } } } +--- + +# Autoresearch + +Autonomous LLM optimization: the agent iterates on `train.py`, runs 5-minute GPU experiments, measures validation bits-per-byte (val_bpb), and publishes the best checkpoint as a sellable Ollama model. + +## When to Use + +- Optimizing a base model for a specific domain or task +- Running automated training experiments to improve val_bpb +- Publishing an optimized model checkpoint to Ollama +- Selling an optimized model via x402 payment-gated inference + +## When NOT to Use + +- Selling an existing model without optimization — use `sell` +- Buying remote inference — use `buy-inference` +- Cluster diagnostics — use `obol-stack` + +## Quick Start + +### 1. Prepare Data + +Place your training and validation data in the autoresearch working directory: + +``` +autoresearch/ + train.bin # training data (tokenized) + val.bin # validation data (tokenized) + train.py # training script (agent modifies this) + results.tsv # experiment log (appended by each run) +``` + +### 2. Run Experiments + +The agent modifies `train.py` and runs experiments in a loop. Each experiment: + +- Has a **5-minute time budget** on GPU +- Produces a checkpoint and a val_bpb measurement +- Is tracked as a git commit with status (keep/discard) in `results.tsv` + +The `results.tsv` file is tab-separated with columns: + +``` +commit_hash val_bpb status description +a1b2c3d 1.042 keep baseline transformer +e4f5g6h 1.038 keep added RMSNorm +i7j8k9l 1.051 discard unstable lr schedule +``` + +### 3. Publish the Best Model + +Once experiments are complete, use `publish.py` to find the best checkpoint, register it with Ollama, and optionally sell it: + +```bash +# Publish to Ollama only +python3 scripts/publish.py /path/to/autoresearch + +# Publish and sell via x402 +python3 scripts/publish.py /path/to/autoresearch \ + --sell \ + --wallet 0xYourWalletAddress \ + --price 0.002 \ + --chain base-sepolia +``` + +## Commands + +| Command | Description | +|---------|-------------| +| `publish.py ` | Find best experiment, create Ollama model, generate provenance | +| `publish.py --sell --wallet --price

--chain ` | Publish and sell via `obol sell inference` | + +## How It Works + +1. **Experiment loop**: The agent edits `train.py`, runs training for up to 5 minutes, measures val_bpb on the validation set, and commits the result with a keep/discard verdict. + +2. **Selection**: `publish.py` reads `results.tsv`, filters for `status=keep`, and selects the experiment with the lowest val_bpb (lower is better — fewer bits per byte means better compression / prediction). + +3. **Provenance**: A JSON provenance file is generated with: + - `framework`: training framework used + - `metricName`: metric identifier (`val_bpb`) + - `metricValue`: winning metric value as a string + - `trainHash`: `sha256:` hash of the `train.py` at the winning commit + - `paramCount`: model parameter count as a string + - `experimentId`: git commit hash of the winning experiment + +4. **Ollama registration**: A Modelfile is generated from the checkpoint and `ollama create` registers the model locally. + +5. **Sell (optional)**: If `--sell` is passed, runs `obol sell inference` with the `--provenance-file` flag pointing at the provenance JSON so buyers can verify optimization lineage. + +## Architecture + +``` +Agent (autoresearch loop) + | + +-- edit train.py + +-- run experiment (5-min budget) + +-- measure val_bpb + +-- commit results.tsv + | + v +publish.py + | + +-- read results.tsv → best experiment + +-- git show :train.py → SHA-256 trainHash + +-- generate provenance.json + +-- generate Modelfile → ollama create + +-- (optional) obol sell inference --provenance-file +``` + +## Constraints + +- **Python stdlib + uv** — no pip install; uv for environment management +- **5-minute time budget** — each experiment must complete within 5 minutes +- **GPU required** — training runs on local GPU (Ollama must have GPU access) +- **Git repo required** — autoresearch directory must be a git repository for commit tracking +- **results.tsv format** — tab-separated: `commit_hash`, `val_bpb`, `status`, `description` + +## OASF Registration + +When registering an autoresearch-optimized model on-chain via ERC-8004: + +- **Skills**: `machine_learning/model_optimization` +- **Domains**: `technology/artificial_intelligence/research` + +## References + +- `references/autoresearch-overview.md` — val_bpb metric, time budget, and the train.py modification loop diff --git a/internal/embed/skills/autoresearch/references/autoresearch-overview.md b/internal/embed/skills/autoresearch/references/autoresearch-overview.md new file mode 100644 index 00000000..0b5645c7 --- /dev/null +++ b/internal/embed/skills/autoresearch/references/autoresearch-overview.md @@ -0,0 +1,96 @@ +# Autoresearch Overview + +## What Is Autoresearch? + +Autoresearch is an autonomous LLM optimization methodology where an AI agent iteratively modifies a training script (`train.py`), runs short experiments on GPU, and measures improvement using a single metric: **validation bits per byte (val_bpb)**. + +The agent operates in a tight loop: + +1. Analyze previous experiment results +2. Hypothesize an improvement to `train.py` +3. Commit the change (git) +4. Run training for up to 5 minutes on GPU +5. Measure val_bpb on the held-out validation set +6. Record the result in `results.tsv` with a keep/discard verdict +7. Repeat + +## val_bpb Metric + +**Validation bits per byte** measures how many bits the model needs, on average, to encode each byte of the validation set. It is derived from cross-entropy loss: + +``` +val_bpb = cross_entropy_loss / ln(2) * (tokens / bytes) +``` + +**Interpretation:** + +- **Lower is better** — fewer bits per byte means the model compresses / predicts the validation data more efficiently +- Typical range for small LLMs: 0.8 - 1.2 bpb +- A 0.01 improvement in val_bpb is meaningful at small scale +- The metric is data-dependent — only comparable across runs on the same validation set + +## 5-Minute Time Budget + +Each experiment is capped at **5 minutes of GPU wall-clock time**. This constraint: + +- Forces the agent to make small, testable changes +- Prevents runaway training jobs +- Enables rapid iteration (dozens of experiments per hour) +- Makes the search tractable on consumer GPUs + +The time budget is enforced by the training harness. If training does not converge within 5 minutes, the checkpoint at timeout is evaluated. + +## The train.py Modification Loop + +The agent treats `train.py` as the single artifact to optimize. Modifications include: + +- **Architecture changes**: layer count, hidden dimensions, attention heads, normalization +- **Training hyperparameters**: learning rate, batch size, warmup schedule, weight decay +- **Optimization tricks**: gradient accumulation, mixed precision, curriculum +- **Regularization**: dropout, data augmentation, label smoothing +- **Novel ideas**: the agent can try unconventional approaches + +Each modification is a git commit so the exact code for every experiment is reproducible. + +## results.tsv Format + +Tab-separated file appended after each experiment: + +| Column | Type | Description | +|--------|------|-------------| +| `commit_hash` | string | Git commit SHA of the experiment | +| `val_bpb` | float | Validation bits per byte (lower is better) | +| `status` | string | `keep` or `discard` | +| `description` | string | Brief description of what changed | + +Example: + +``` +commit_hash val_bpb status description +a1b2c3d 1.042 keep baseline transformer +e4f5g6h 1.038 keep added RMSNorm pre-norm +i7j8k9l 1.051 discard unstable cosine lr schedule +m0n1o2p 1.031 keep increased hidden dim to 512 +``` + +## OASF Registration + +When publishing an autoresearch-optimized model on-chain via ERC-8004, use these OASF classifications: + +- **Skills**: `machine_learning/model_optimization` +- **Domains**: `technology/artificial_intelligence/research` + +These tags help buyers discover optimized models through the agent discovery protocol and understand the provenance of the offering. + +## Provenance + +The `publish.py` script generates a provenance JSON file that records: + +- **framework**: `autoresearch` +- **metricName**: `val_bpb` +- **metricValue**: the winning `val_bpb` value as a string +- **trainHash**: `sha256:` hash of `train.py` at the winning commit (reproducibility proof) +- **paramCount**: model parameter count as a string (when available from checkpoint metadata) +- **experimentId**: git commit hash of the winning experiment + +This provenance file can be passed to `obol sell inference --provenance-file` so that buyers can verify the optimization lineage of the model they are purchasing. diff --git a/internal/embed/skills/autoresearch/references/deploy-app-prompt.md b/internal/embed/skills/autoresearch/references/deploy-app-prompt.md new file mode 100644 index 00000000..7e2be0d3 --- /dev/null +++ b/internal/embed/skills/autoresearch/references/deploy-app-prompt.md @@ -0,0 +1,218 @@ +# Deploy x402-Gated Web App — Agent Prompt Template + +Paste this into the obol-agent chat, customising the variables at the top. + +## Key architecture decisions + +- **Namespace: `llm`** — deploy alongside LiteLLM so the Deployment can mount `litellm-secrets` directly (Secrets are namespace-scoped, can't cross namespaces) +- **Image: `python:3.12-slim`** — public image, no build needed. k3d pulls it automatically. +- **App code in ConfigMap** — agent writes Python source to a ConfigMap, mounts it into the container at `/app`. Same pattern as the `.well-known/agent-registration.json` busybox httpd. +- **LiteLLM auth** — the Deployment reads `LITELLM_MASTER_KEY` from Secret `litellm-secrets` in `llm` namespace via `secretKeyRef`. Internal calls to LiteLLM don't go through x402. +- **x402 gating** — handled by the ServiceOffer CR. monetize.py creates the Traefik ForwardAuth Middleware + HTTPRoute at `/services//*`. Traefik strips the prefix (`ReplacePrefixMatch: /`) before forwarding, so the app just serves at `/`. +- **RBAC** — the agent already has cluster-wide RBAC for Deployments, Services, ConfigMaps, and ServiceOffers. No changes needed. + +--- + +## Prompt + +``` +Deploy a payment-gated web application into the cluster. Follow these steps exactly. + +### Step 1: Create ConfigMap with app code + +Create a ConfigMap named `cv-enhancer-app` in namespace `llm`. + +It must contain a single key `app.py` — a Python HTTP server (stdlib only: http.server, urllib, json, os, sys). The server must: + +**GET /** +Render a dark-themed HTML page with: +- Title: "CV Enhancer" with a green "x402" badge +- Subtitle: "Upload your resume and receive a polished, professional version" +- A