From cb6dc197ce3a879240a65b724997d905e33b4ab5 Mon Sep 17 00:00:00 2001 From: Jack Zhuang <277994282+os-zhuang@users.noreply.github.com> Date: Tue, 2 Jun 2026 12:32:02 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat(expression):=20ADR-0032=20phase=201=20?= =?UTF-8?q?=E2=80=94=20validate-by-default=20expression=20layer,=20no=20si?= =?UTF-8?q?lent=20failure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the #1491 class: a malformed predicate (notably the `{record.x}` template-brace-in-CEL mistake) was silently swallowed to `false`, so a flow "fired" with success but did nothing. - service-automation: evaluateCondition throws an attributed/corrective error instead of returning false; registerFlow parse-validates every predicate at registration (loud, located). - formula: shared validator (validateExpression / introspectScope / CEL_STDLIB_FUNCTIONS) with schema-aware field-existence + did-you-mean; the {{ }} template engine gains a formatter whitelist with defined value→string semantics (plain {{ path }} stays back-compatible; arbitrary-logic holes rejected). - cli: objectstack compile validates flow / validation-rule / field-formula predicates against the resolved object schema, failing the build with located corrective messages. - service-ai: agent-callable validate_expression tool for author-time self-correction. - spec: fix FlowSchema JSDoc example that taught the bad single-brace form. Tests: formula 74, service-automation 149, service-ai 412, cli 152, spec 6608, objectql 484, runtime 334 — all green. Co-Authored-By: Claude Opus 4.8 --- .changeset/adr-0032-expression-validation.md | 32 +++ .../app-crm/src/flows/convert-lead.flow.ts | 2 +- packages/cli/package.json | 1 + packages/cli/src/commands/compile.ts | 22 ++ .../src/utils/validate-expressions.test.ts | 69 ++++++ .../cli/src/utils/validate-expressions.ts | 114 +++++++++ packages/formula/src/index.ts | 6 +- packages/formula/src/template-engine.ts | 230 ++++++++++++++---- .../formula/src/template-formatters.test.ts | 55 +++++ packages/formula/src/validate.test.ts | 73 ++++++ packages/formula/src/validate.ts | 207 ++++++++++++++++ packages/services/service-ai/package.json | 1 + .../src/__tests__/metadata-tools.test.ts | 16 +- .../services/service-ai/src/tools/index.ts | 1 + .../service-ai/src/tools/metadata-tools.ts | 41 ++++ .../src/tools/validate-expression.tool.ts | 50 ++++ .../service-automation/src/engine.test.ts | 33 +++ .../services/service-automation/src/engine.ts | 74 +++++- packages/spec/src/automation/flow.zod.ts | 7 +- pnpm-lock.yaml | 6 + 20 files changed, 983 insertions(+), 57 deletions(-) create mode 100644 .changeset/adr-0032-expression-validation.md create mode 100644 packages/cli/src/utils/validate-expressions.test.ts create mode 100644 packages/cli/src/utils/validate-expressions.ts create mode 100644 packages/formula/src/template-formatters.test.ts create mode 100644 packages/formula/src/validate.test.ts create mode 100644 packages/formula/src/validate.ts create mode 100644 packages/services/service-ai/src/tools/validate-expression.tool.ts diff --git a/.changeset/adr-0032-expression-validation.md b/.changeset/adr-0032-expression-validation.md new file mode 100644 index 000000000..837f6df33 --- /dev/null +++ b/.changeset/adr-0032-expression-validation.md @@ -0,0 +1,32 @@ +--- +"@objectstack/formula": minor +"@objectstack/service-automation": minor +"@objectstack/service-ai": minor +"@objectstack/cli": minor +"@objectstack/spec": patch +--- + +ADR-0032 (phase 1): validate-by-default expression layer — no silent failure. + +Kills the #1491 class where a malformed predicate (e.g. the `{record.x}` +template-brace-in-CEL mistake) silently evaluated to `false` and made a flow +"fire" with no effect: + +- **service-automation**: flow `evaluateCondition` no longer swallows CEL + failures to `false` — it throws an attributed, corrective error; and + `registerFlow` now parse-validates every predicate (start/decision/edge + condition) at registration, failing loudly with the offending location + + source + the fix. +- **formula**: new shared validator — `validateExpression(role, src, schema?)`, + `introspectScope`, `CEL_STDLIB_FUNCTIONS` — with schema-aware field-existence + + did-you-mean. The `{{ }}` template engine gains a formatter whitelist + (`currency`/`number`/`percent`/`date`/`datetime`/`truncate`/`upper`/`lower`/ + `default`/…) with defined value→string semantics; arbitrary logic in holes is + rejected. Plain `{{ path }}` stays back-compatible. +- **cli**: `objectstack compile` validates every flow / validation-rule / + field-formula predicate against the resolved object schema and fails the + build with located, corrective messages. +- **service-ai**: new agent-callable `validate_expression` tool so authoring + agents self-correct before committing. +- **spec**: fix the `FlowSchema` JSDoc example that taught the bad + `condition: "{amount} < 500"` single-brace form. diff --git a/examples/app-crm/src/flows/convert-lead.flow.ts b/examples/app-crm/src/flows/convert-lead.flow.ts index 14a43727b..7ad40973f 100644 --- a/examples/app-crm/src/flows/convert-lead.flow.ts +++ b/examples/app-crm/src/flows/convert-lead.flow.ts @@ -206,7 +206,7 @@ export const ConvertLeadScreenFlow = defineFlow({ { id: 'e1', source: 'start', target: 'get_lead', type: 'default' }, { id: 'e2', source: 'get_lead', target: 'check_converted', type: 'default' }, // guard branches - { id: 'e3a', source: 'check_converted', target: 'screen_already_converted', type: 'default', condition: "{lead_record.status} == 'converted'", label: 'Yes' }, + { id: 'e3a', source: 'check_converted', target: 'screen_already_converted', type: 'default', condition: "lead_record.status == 'converted'", label: 'Yes' }, { id: 'e3b', source: 'check_converted', target: 'screen_qualify', type: 'default', label: 'No' }, { id: 'e3c', source: 'screen_already_converted', target: 'end', type: 'default' }, // main path diff --git a/packages/cli/package.json b/packages/cli/package.json index 2f8d8a541..0ba35b794 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -48,6 +48,7 @@ "@objectstack/driver-mongodb": "workspace:^", "@objectstack/driver-sql": "workspace:^", "@objectstack/driver-sqlite-wasm": "workspace:^", + "@objectstack/formula": "workspace:*", "@objectstack/objectql": "workspace:^", "@objectstack/observability": "workspace:^", "@objectstack/platform-objects": "workspace:*", diff --git a/packages/cli/src/commands/compile.ts b/packages/cli/src/commands/compile.ts index fcaa0f28b..b13ead6c1 100644 --- a/packages/cli/src/commands/compile.ts +++ b/packages/cli/src/commands/compile.ts @@ -8,6 +8,7 @@ import { ZodError } from 'zod'; import { ObjectStackDefinitionSchema, normalizeStackInput } from '@objectstack/spec'; import { loadConfig } from '../utils/config.js'; import { lowerCallables } from '../utils/lower-callables.js'; +import { validateStackExpressions } from '../utils/validate-expressions.js'; import { buildRuntimeBundle, cleanupOldRuntimeBundles } from '../utils/build-runtime.js'; import { printHeader, @@ -123,6 +124,27 @@ export default class Compile extends Command { this.exit(1); } + // 3b. Validate expressions against the resolved schema (ADR-0032 §1a/1b). + // The whole normalized stack is in hand here, so flow/validation + // predicates are checked for CEL syntax AND that `record.` + // references exist on the target object — failing the build with a + // located, corrective message instead of a silent runtime `false`. + if (!flags.json) printStep('Validating expressions (ADR-0032)...'); + const exprIssues = validateStackExpressions(result.data as Record); + if (exprIssues.length > 0) { + if (flags.json) { + console.log(JSON.stringify({ success: false, error: 'expression validation failed', issues: exprIssues })); + this.exit(1); + } + console.log(''); + printError(`Expression validation failed (${exprIssues.length} issue${exprIssues.length > 1 ? 's' : ''})`); + for (const i of exprIssues.slice(0, 50)) { + console.log(` • ${i.where}: ${i.message}`); + console.log(` source: \`${i.source}\``); + } + this.exit(1); + } + // 4. Generate Artifact if (!flags.json) printStep('Writing artifact...'); const output = flags.output!; diff --git a/packages/cli/src/utils/validate-expressions.test.ts b/packages/cli/src/utils/validate-expressions.test.ts new file mode 100644 index 000000000..27bab9b03 --- /dev/null +++ b/packages/cli/src/utils/validate-expressions.test.ts @@ -0,0 +1,69 @@ +import { describe, it, expect } from 'vitest'; +import { validateStackExpressions } from './validate-expressions.js'; + +describe('validateStackExpressions (ADR-0032 build-time)', () => { + const objects = [ + { name: 'crm_lead', fields: { rating: { type: 'number' }, status: { type: 'text' } } }, + ]; + + it('passes a clean stack', () => { + const issues = validateStackExpressions({ + objects, + flows: [{ + name: 'lead_flow', + nodes: [ + { id: 'start', type: 'start', config: { objectName: 'crm_lead' } }, + { id: 'check', type: 'decision', config: { condition: 'record.rating >= 4' } }, + ], + edges: [{ id: 'e1', source: 'check', target: 'end', condition: 'record.rating < 4' }], + }], + }); + expect(issues).toHaveLength(0); + }); + + it('flags a brace-in-CEL condition with location + corrective message', () => { + const issues = validateStackExpressions({ + objects, + flows: [{ + name: 'lead_flow', + nodes: [ + { id: 'start', type: 'start', config: { objectName: 'crm_lead' } }, + { id: 'check', type: 'decision', config: { condition: '{record.rating} >= 4' } }, + ], + edges: [], + }], + }); + expect(issues).toHaveLength(1); + expect(issues[0].where).toContain("flow 'lead_flow'"); + expect(issues[0].where).toContain("node 'check'"); + expect(issues[0].message).toMatch(/map literal|bare reference/); + expect(issues[0].source).toBe('{record.rating} >= 4'); + }); + + it('flags an unknown record field against the resolved schema (did-you-mean)', () => { + const issues = validateStackExpressions({ + objects, + flows: [{ + name: 'lead_flow', + nodes: [ + { id: 'start', type: 'start', config: { objectName: 'crm_lead' } }, + { id: 'check', type: 'decision', config: { condition: 'record.raitng >= 4' } }, + ], + edges: [], + }], + }); + expect(issues).toHaveLength(1); + expect(issues[0].message).toMatch(/unknown field `raitng`/); + expect(issues[0].message).toMatch(/did you mean `rating`/); + }); + + it('validates object validation-rule predicates too', () => { + const issues = validateStackExpressions({ + objects: [ + { name: 'crm_lead', fields: { rating: {} }, validations: [{ name: 'r1', expression: '{record.rating} > 0' }] }, + ], + }); + expect(issues).toHaveLength(1); + expect(issues[0].where).toContain("validation 'r1'"); + }); +}); diff --git a/packages/cli/src/utils/validate-expressions.ts b/packages/cli/src/utils/validate-expressions.ts new file mode 100644 index 000000000..8260782fd --- /dev/null +++ b/packages/cli/src/utils/validate-expressions.ts @@ -0,0 +1,114 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * Build-time expression validation (ADR-0032 §Decision 1a + 1b). + * + * Runs at `objectstack compile`, where the whole normalized stack is in hand — + * so flow conditions can be checked against the *resolved* object schema + * (field existence) in addition to CEL syntax. Uses the one shared validator + * from `@objectstack/formula`, so the verdict matches `registerFlow` and the + * agent `validate_expression` tool exactly. + * + * Scope (v1): flow predicates (start/decision `config.condition` + edge + * `condition`) and object validation-rule / formula predicates. Each error is + * located (flow/object + node/edge/field) with a corrective message. + */ + +import { validateExpression } from '@objectstack/formula'; + +export interface ExprIssue { + where: string; + message: string; + source: string; +} + +type AnyRec = Record; + +/** Coerce an `objects` collection (array or name-keyed map) to an array. */ +function asArray(v: unknown): AnyRec[] { + if (Array.isArray(v)) return v as AnyRec[]; + if (v && typeof v === 'object') { + return Object.entries(v as AnyRec).map(([name, def]) => ({ name, ...(def as AnyRec) })); + } + return []; +} + +/** object name → set of its field names, for schema-aware field checks. */ +function buildFieldIndex(objects: AnyRec[]): Map { + const idx = new Map(); + for (const obj of objects) { + const name = typeof obj.name === 'string' ? obj.name : undefined; + if (!name) continue; + const fields = obj.fields; + let names: string[] = []; + if (Array.isArray(fields)) names = fields.map(f => (f as AnyRec).name).filter((n): n is string => typeof n === 'string'); + else if (fields && typeof fields === 'object') names = Object.keys(fields as AnyRec); + idx.set(name, names); + } + return idx; +} + +/** + * Validate every predicate in the stack. Returns the list of issues (empty = + * clean). Caller decides how to surface / whether to fail the build. + */ +export function validateStackExpressions(stack: AnyRec): ExprIssue[] { + const issues: ExprIssue[] = []; + const objects = asArray(stack.objects); + const fieldIndex = buildFieldIndex(objects); + + const check = (where: string, raw: unknown, objectName?: string): void => { + if (raw == null) return; + const fields = objectName ? fieldIndex.get(objectName) : undefined; + const res = validateExpression('predicate', raw as string | { dialect?: string; source?: string }, + objectName ? { objectName, fields } : undefined); + for (const e of res.errors) issues.push({ where, message: e.message, source: e.source }); + }; + + // ── Flows ────────────────────────────────────────────────────────── + for (const flow of asArray(stack.flows)) { + const flowName = typeof flow.name === 'string' ? flow.name : '(unnamed flow)'; + const nodes = Array.isArray(flow.nodes) ? (flow.nodes as AnyRec[]) : []; + const edges = Array.isArray(flow.edges) ? (flow.edges as AnyRec[]) : []; + // The record-change target object — `record.*` refs resolve against it. + const startNode = nodes.find(n => n.type === 'start'); + const startCfg = (startNode?.config ?? {}) as AnyRec; + const objectName = typeof startCfg.objectName === 'string' ? startCfg.objectName : undefined; + + for (const node of nodes) { + const cfg = (node.config ?? {}) as AnyRec; + check(`flow '${flowName}' · node '${node.id}' (${node.type}) condition`, cfg.condition, objectName); + } + for (const edge of edges) { + check(`flow '${flowName}' · edge '${edge.id}' (${edge.source}→${edge.target}) condition`, edge.condition, objectName); + } + } + + // ── Object validation-rule + formula predicates ──────────────────── + for (const obj of objects) { + const objectName = typeof obj.name === 'string' ? obj.name : undefined; + const validations = obj.validations ?? obj.validationRules; + for (const rule of asArray(validations)) { + const where = `object '${objectName}' · validation '${(rule.name as string) ?? '?'}'`; + // Common predicate keys across rule shapes. + check(where, rule.expression ?? rule.predicate ?? rule.condition ?? rule.formula, objectName); + } + // Field-level formulas (computed fields) reference the same object. + const fields = obj.fields; + const fieldList = Array.isArray(fields) + ? (fields as AnyRec[]) + : (fields && typeof fields === 'object' ? Object.values(fields as AnyRec) as AnyRec[] : []); + for (const f of fieldList) { + if (f && typeof f === 'object' && f.formula) { + // formulas are `value` role (any return type), still CEL. + const res = validateExpression('value', f.formula as string | { dialect?: string; source?: string }, + objectName ? { objectName, fields: fieldIndex.get(objectName) } : undefined); + for (const e of res.errors) { + issues.push({ where: `object '${objectName}' · field '${(f.name as string) ?? '?'}' formula`, message: e.message, source: e.source }); + } + } + } + } + + return issues; +} diff --git a/packages/formula/src/index.ts b/packages/formula/src/index.ts index 5458d8ab0..4ddfa9bdc 100644 --- a/packages/formula/src/index.ts +++ b/packages/formula/src/index.ts @@ -12,9 +12,13 @@ export { ExpressionEngine, getEngine, hasDialect, register } from './registry'; export { celEngine, DEFAULT_LIMITS } from './cel-engine'; export { cronEngine } from './cron-engine'; -export { templateEngine } from './template-engine'; +export { templateEngine, TEMPLATE_FORMATTERS } from './template-engine'; export { registerStdLib, buildScope } from './stdlib'; export { resolveSeed, resolveSeedRecord } from './seed-eval'; export { normalizeExpression, normalizeExpressionTree } from './normalize'; +// ADR-0032 — shared validator + introspection (one validator for build, +// registration, and the agent-callable validate_expression tool). +export { validateExpression, introspectScope, expectedDialect, CEL_STDLIB_FUNCTIONS } from './validate'; +export type { FieldRole, ExprSchemaHint, ExprValidationError, ExprValidationResult } from './validate'; export type { SeedValue, SeedPrimitive } from './seed-eval'; export type { DialectEngine, EvalContext, EvalResult, EvalError } from './types'; diff --git a/packages/formula/src/template-engine.ts b/packages/formula/src/template-engine.ts index 0c722161a..8dc4e04e8 100644 --- a/packages/formula/src/template-engine.ts +++ b/packages/formula/src/template-engine.ts @@ -1,17 +1,19 @@ /** - * Template dialect engine — strict Mustache subset. + * Template dialect engine — strict Mustache subset with a formatter whitelist. * - * Supports `{{path.to.value}}` interpolation only. No conditionals, no loops, - * no helpers. The variable scope is the same as CEL (`record`, `previous`, - * `input`, `os.user`, `os.org`, `os.env`, plus `extra`), so authors can move - * fluidly between a CEL formula and a template body without re-learning a - * second variable namespace. + * Holes are `{{ path }}` or `{{ path | formatter[:'arg'] }}` (ADR-0032 §3). + * Holes are restricted to a **field/variable path** plus a **whitelisted + * formatter** — never arbitrary CEL logic — so the grammar stays small (low + * author/agent error surface), GUI-pickable (path + formatter dropdown), and + * display strings stay declarative. Real logic belongs in `Predicate`/`Expr` + * (CEL) fields, where it is validated and visible. * - * Why a separate dialect from CEL: templates produce strings (notification - * subjects, prompt bodies, titleFormat). CEL is a value-typed expression - * language. Routing them through the same envelope (`{ dialect: 'template' }`) - * keeps the AI author rule simple — "anything templated or computed is an - * Expression" — without conflating the two semantics. + * The variable scope is the same as CEL (`record`, `previous`, `input`, + * `os.user/org/env`, plus `extra`), so authors move fluidly between a CEL + * formula and a template body without re-learning a namespace. + * + * Value→string semantics are explicit and defined per formatter (numbers, + * dates, money, percent, null), instead of implicit coercion. */ import type { Expression } from '@objectstack/spec'; @@ -19,21 +21,104 @@ import type { Expression } from '@objectstack/spec'; import { buildScope } from './stdlib'; import type { DialectEngine, EvalContext, EvalResult } from './types'; -const PATH_RE = /\{\{\s*([\w.[\]]+?)\s*\}\}/g; +/** A hole: capture the full inner content (no `}` allowed inside). */ +const HOLE_RE = /\{\{\s*([^}]*?)\s*\}\}/g; -function resolvePath(scope: Record, path: string): unknown { - // Support `a.b.c` and `a[0].b` style. Bracket notation collapses to dotted. - const normalized = path.replace(/\[(\w+)\]/g, '.$1'); - const segments = normalized.split('.').filter(Boolean); - let cursor: unknown = scope; - for (const seg of segments) { - if (cursor == null || typeof cursor !== 'object') return undefined; - cursor = (cursor as Record)[seg]; +// ───────────────────────── formatter whitelist (ADR-0032 §3) ────────────── + +type Formatter = (value: unknown, arg: string | undefined, locale: string) => string; + +function asNumber(v: unknown): number | undefined { + if (typeof v === 'number') return v; + if (typeof v === 'bigint') return Number(v); + if (typeof v === 'string' && v.trim() !== '' && !Number.isNaN(Number(v))) return Number(v); + return undefined; +} + +function asDate(v: unknown): Date | undefined { + if (v instanceof Date) return v; + if (typeof v === 'number') return new Date(v); + if (typeof v === 'string') { + const d = new Date(v); + if (!Number.isNaN(d.getTime())) return d; } - return cursor; + return undefined; } -function stringify(value: unknown): string { +const FORMATTERS: Record = { + upper: (v) => baseString(v).toUpperCase(), + lower: (v) => baseString(v).toLowerCase(), + trim: (v) => baseString(v).trim(), + // number | number:2 → grouped, optional fixed decimals + number: (v, arg, locale) => { + const n = asNumber(v); + if (n === undefined) return baseString(v); + const digits = arg !== undefined ? Number(arg) : undefined; + return new Intl.NumberFormat(locale, digits !== undefined && !Number.isNaN(digits) + ? { minimumFractionDigits: digits, maximumFractionDigits: digits } : {}).format(n); + }, + // currency | currency:EUR → defaults to USD + currency: (v, arg, locale) => { + const n = asNumber(v); + if (n === undefined) return baseString(v); + const code = (arg && arg.trim()) || 'USD'; + try { + return new Intl.NumberFormat(locale, { style: 'currency', currency: code }).format(n); + } catch { + return new Intl.NumberFormat(locale, { style: 'currency', currency: 'USD' }).format(n); + } + }, + // percent | percent:1 → 0.42 → "42%" (value is a 0..1 ratio) + percent: (v, arg, locale) => { + const n = asNumber(v); + if (n === undefined) return baseString(v); + const digits = arg !== undefined ? Number(arg) : 0; + return new Intl.NumberFormat(locale, { + style: 'percent', + minimumFractionDigits: Number.isNaN(digits) ? 0 : digits, + maximumFractionDigits: Number.isNaN(digits) ? 0 : digits, + }).format(n); + }, + // date | date:long | date:iso → date-only + date: (v, arg, locale) => { + const d = asDate(v); + if (!d) return baseString(v); + if (arg === 'iso') return d.toISOString().slice(0, 10); + const style = arg === 'long' ? 'long' : arg === 'medium' ? 'medium' : 'short'; + return new Intl.DateTimeFormat(locale, { dateStyle: style as 'short' | 'medium' | 'long' }).format(d); + }, + // datetime | datetime:long | datetime:iso + datetime: (v, arg, locale) => { + const d = asDate(v); + if (!d) return baseString(v); + if (arg === 'iso') return d.toISOString(); + const style = arg === 'long' ? 'long' : arg === 'medium' ? 'medium' : 'short'; + return new Intl.DateTimeFormat(locale, { + dateStyle: style as 'short' | 'medium' | 'long', + timeStyle: style as 'short' | 'medium' | 'long', + }).format(d); + }, + // truncate:80 → cut with an ellipsis + truncate: (v, arg) => { + const s = baseString(v); + const len = arg !== undefined ? Number(arg) : 80; + if (Number.isNaN(len) || s.length <= len) return s; + return s.slice(0, Math.max(0, len - 1)) + '…'; + }, + // default:'N/A' → fallback when the value is null/undefined/empty + default: (v, arg) => { + const s = baseString(v); + return s === '' ? (arg ?? '') : s; + }, + json: (v) => { + try { return JSON.stringify(v); } catch { return String(v); } + }, +}; + +/** Public list of whitelisted template formatters (for introspection/docs). */ +export const TEMPLATE_FORMATTERS: string[] = Object.keys(FORMATTERS); + +function baseString(value: unknown): string { if (value === null || value === undefined) return ''; if (value instanceof Date) return value.toISOString(); if (typeof value === 'string') return value; @@ -46,23 +131,76 @@ function stringify(value: unknown): string { } } -function compileTemplate(source: string): EvalResult { - // Compile is only a structural validity check — no helpers, no balanced - // open/close beyond what the regex enforces. - const matches = source.match(/\{\{|\}\}/g) ?? []; - if (matches.length % 2 !== 0) { - return { - ok: false, - error: { kind: 'parse', message: 'template has unbalanced {{ }} delimiters' }, - }; +function resolvePath(scope: Record, path: string): unknown { + const normalized = path.replace(/\[(\w+)\]/g, '.$1'); + const segments = normalized.split('.').filter(Boolean); + let cursor: unknown = scope; + for (const seg of segments) { + if (cursor == null || typeof cursor !== 'object') return undefined; + cursor = (cursor as Record)[seg]; + } + return cursor; +} + +interface ParsedHole { + path: string; + filter?: { name: string; arg?: string }; +} + +const PATH_ONLY_RE = /^[\w.[\]]+$/; + +/** + * Parse a hole's inner content into a path + optional single formatter. + * Returns null when the inner content is not a valid path[+formatter] form + * (e.g. arbitrary CEL was written into a hole — rejected, ADR-0032 §3). + */ +function parseHole(inner: string): ParsedHole | null { + const pipe = inner.indexOf('|'); + if (pipe === -1) { + const path = inner.trim(); + return PATH_ONLY_RE.test(path) ? { path } : null; + } + const path = inner.slice(0, pipe).trim(); + if (!PATH_ONLY_RE.test(path)) return null; + const filterPart = inner.slice(pipe + 1).trim(); + // `name` or `name:arg` or `name:'arg'` + const colon = filterPart.indexOf(':'); + let name = filterPart; + let arg: string | undefined; + if (colon !== -1) { + name = filterPart.slice(0, colon).trim(); + arg = filterPart.slice(colon + 1).trim().replace(/^['"]|['"]$/g, ''); } - const refs: string[] = []; + if (!FORMATTERS[name]) return null; + return { path, filter: { name, arg } }; +} + +function compileTemplate(source: string): EvalResult { + const open = (source.match(/\{\{/g) ?? []).length; + const close = (source.match(/\}\}/g) ?? []).length; + if (open !== close) { + return { ok: false, error: { kind: 'parse', message: 'template has unbalanced {{ }} delimiters' } }; + } + const holes: ParsedHole[] = []; let m: RegExpExecArray | null; - PATH_RE.lastIndex = 0; - while ((m = PATH_RE.exec(source)) !== null) { - refs.push(m[1]); + HOLE_RE.lastIndex = 0; + while ((m = HOLE_RE.exec(source)) !== null) { + const parsed = parseHole(m[1]); + if (!parsed) { + return { + ok: false, + error: { + kind: 'parse', + message: + `invalid template hole \`{{ ${m[1]} }}\` — holes are a field path with an optional ` + + `formatter (\`{{ record.amount | currency }}\`), not arbitrary logic. ` + + `Move logic into a CEL field. Known formatters: ${TEMPLATE_FORMATTERS.join(', ')}.`, + }, + }; + } + holes.push(parsed); } - return { ok: true, value: refs }; + return { ok: true, value: holes }; } export const templateEngine: DialectEngine = { @@ -80,17 +218,25 @@ export const templateEngine: DialectEngine = { }; } if (typeof expr.source !== 'string') { - return { - ok: false, - error: { kind: 'parse', message: 'template Expression.source required' }, - }; + return { ok: false, error: { kind: 'parse', message: 'template Expression.source required' } }; } const check = compileTemplate(expr.source); if (!check.ok) return check as EvalResult; const scope = buildScope(ctx); - const out = expr.source.replace(PATH_RE, (_match, path) => { - return stringify(resolvePath(scope, path)); + const locale = + (ctx.extra && typeof ctx.extra.locale === 'string' && ctx.extra.locale) || + (typeof (ctx as { locale?: string }).locale === 'string' && (ctx as { locale?: string }).locale) || + 'en-US'; + + const out = expr.source.replace(HOLE_RE, (_match, inner) => { + const parsed = parseHole(String(inner)); + if (!parsed) return _match; // compile already validated; defensive + const value = resolvePath(scope, parsed.path); + if (parsed.filter) { + return FORMATTERS[parsed.filter.name](value, parsed.filter.arg, locale as string); + } + return baseString(value); }); return { ok: true, value: out as unknown as T }; }, diff --git a/packages/formula/src/template-formatters.test.ts b/packages/formula/src/template-formatters.test.ts new file mode 100644 index 000000000..e7d975a7b --- /dev/null +++ b/packages/formula/src/template-formatters.test.ts @@ -0,0 +1,55 @@ +import { describe, it, expect } from 'vitest'; +import { templateEngine, TEMPLATE_FORMATTERS } from './template-engine'; + +function render(source: string, record: Record): string { + const r = templateEngine.evaluate({ dialect: 'template', source }, { record, extra: { locale: 'en-US' } }); + if (!r.ok) throw new Error(r.error.message); + return r.value; +} + +describe('template formatters (ADR-0032 §3)', () => { + it('keeps plain {{ path }} back-compatible (identity stringify)', () => { + expect(render('Hi {{ record.name }}', { name: 'Jane' })).toBe('Hi Jane'); + }); + + it('currency (default USD + explicit code)', () => { + expect(render('{{ record.amt | currency }}', { amt: 1234.5 })).toBe('$1,234.50'); + expect(render('{{ record.amt | currency:EUR }}', { amt: 1000 })).toContain('1,000'); + }); + + it('number with fixed decimals', () => { + expect(render('{{ record.n | number:2 }}', { n: 1234.5 })).toBe('1,234.50'); + }); + + it('percent (ratio → %)', () => { + expect(render('{{ record.r | percent }}', { r: 0.42 })).toBe('42%'); + expect(render('{{ record.r | percent:1 }}', { r: 0.425 })).toBe('42.5%'); + }); + + it('date:iso', () => { + expect(render('{{ record.d | date:iso }}', { d: '2026-06-02T10:00:00Z' })).toBe('2026-06-02'); + }); + + it('truncate + upper + default', () => { + expect(render('{{ record.s | truncate:5 }}', { s: 'abcdefgh' })).toBe('abcd…'); + expect(render('{{ record.s | upper }}', { s: 'hi' })).toBe('HI'); + expect(render("{{ record.missing | default:'N/A' }}", {})).toBe('N/A'); + }); + + it('rejects arbitrary logic in a hole (not a path+formatter)', () => { + const r = templateEngine.compile('{{ record.a > 5 ? "x" : "y" }}'); + expect(r.ok).toBe(false); + if (!r.ok) expect(r.error.message).toMatch(/field path with an optional formatter|arbitrary logic/); + }); + + it('rejects an unknown formatter', () => { + const r = templateEngine.compile('{{ record.a | bogus }}'); + expect(r.ok).toBe(false); + }); + + it('exposes the formatter catalog', () => { + expect(TEMPLATE_FORMATTERS).toEqual( + expect.arrayContaining(['currency', 'number', 'percent', 'date', 'datetime', 'truncate', 'upper', 'lower', 'default']), + ); + }); +}); diff --git a/packages/formula/src/validate.test.ts b/packages/formula/src/validate.test.ts new file mode 100644 index 000000000..36473dc5d --- /dev/null +++ b/packages/formula/src/validate.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect } from 'vitest'; +import { validateExpression, introspectScope, expectedDialect } from './validate'; + +describe('validateExpression (ADR-0032)', () => { + describe('predicates (CEL)', () => { + it('accepts a valid bare-CEL predicate', () => { + const r = validateExpression('predicate', 'record.rating >= 4'); + expect(r.ok).toBe(true); + expect(r.errors).toHaveLength(0); + }); + + it('rejects the #1491 brace-in-CEL form with a corrective message', () => { + const r = validateExpression('predicate', '{record.rating} >= 4'); + expect(r.ok).toBe(false); + expect(r.errors[0].message).toMatch(/map literal|bare reference|template brace/i); + expect(r.errors[0].message).toContain('record.rating'); + expect(r.errors[0].source).toBe('{record.rating} >= 4'); + }); + + it('rejects a CEL envelope placed in a template-only role', () => { + const r = validateExpression('template', { dialect: 'cel', source: 'record.x' }); + expect(r.ok).toBe(false); + }); + + it('accepts an empty/absent expression (no-op)', () => { + expect(validateExpression('predicate', '').ok).toBe(true); + expect(validateExpression('predicate', null).ok).toBe(true); + }); + }); + + describe('templates', () => { + it('accepts a valid {{ path }} template', () => { + const r = validateExpression('template', 'Hot lead: {{ record.full_name }}'); + expect(r.ok).toBe(true); + }); + + it('flags single-brace {x} in a template and suggests {{ }}', () => { + const r = validateExpression('template', 'Hi {record.name}'); + expect(r.ok).toBe(false); + expect(r.errors[0].message).toMatch(/\{\{ record\.name \}\}|double braces/); + }); + }); + + describe('schema-aware field existence (v1)', () => { + it('flags an unknown record field with a did-you-mean', () => { + const r = validateExpression('predicate', 'record.raitng >= 4', { objectName: 'crm_lead', fields: ['rating', 'status'] }); + expect(r.ok).toBe(false); + expect(r.errors[0].message).toMatch(/unknown field `raitng`/); + expect(r.errors[0].message).toMatch(/did you mean `rating`/); + }); + + it('passes when fields exist', () => { + const r = validateExpression('predicate', 'record.rating >= 4 && record.status == "new"', { fields: ['rating', 'status'] }); + expect(r.ok).toBe(true); + }); + + it('skips field checks when no schema is provided', () => { + expect(validateExpression('predicate', 'record.anything > 1').ok).toBe(true); + }); + }); + + describe('introspection', () => { + it('reports the dialect + scope for a field role', () => { + expect(expectedDialect('predicate')).toBe('cel'); + expect(expectedDialect('template')).toBe('template'); + const scope = introspectScope('predicate', { fields: ['rating'] }); + expect(scope.dialect).toBe('cel'); + expect(scope.fields).toContain('rating'); + expect(scope.roots).toContain('record'); + expect(scope.functions).toContain('daysFromNow'); + }); + }); +}); diff --git a/packages/formula/src/validate.ts b/packages/formula/src/validate.ts new file mode 100644 index 000000000..c37078462 --- /dev/null +++ b/packages/formula/src/validate.ts @@ -0,0 +1,207 @@ +/** + * Shared expression validator (ADR-0032 §Decision 1/5). + * + * One validator, used by every author surface — `objectstack build`, + * `registerFlow`/metadata registration, and the agent-callable + * `validate_expression` tool — so a malformed expression is caught the same + * way everywhere, with a message written for **self-correction** (Decision 1d): + * it states what is wrong AND the correct form. + * + * Field roles map to dialects (Decision 2): + * - `predicate` → bare CEL returning bool (`record.rating >= 4`) + * - `value` → bare CEL of any type (`daysFromNow(3)`) + * - `template` → text with `{{ path }}` holes (`Hot lead: {{ record.name }}`) + * + * The #1 author error (human or LLM) is wrapping a field reference in single + * `{…}` braces inside a CEL field — `{x}` parses as a CEL map literal and fails. + * This validator detects that specific mistake and returns the exact fix. + */ + +import { celEngine } from './cel-engine'; +import { templateEngine } from './template-engine'; + +export type FieldRole = 'predicate' | 'value' | 'template'; + +/** + * Loose input accepted by the validator: a bare string, or any object exposing + * `dialect`/`source` (the Expression envelope, or a not-yet-narrowed value from + * a `config.condition` / `edge.condition` field). Kept structural so call sites + * need not pre-narrow to the strict {@link Expression} dialect union. + */ +export type ExprInput = string | { dialect?: string; source?: string } | null | undefined; + +/** Optional schema context for field-existence checks (Decision 1b, v1). */ +export interface ExprSchemaHint { + /** Object the expression is authored against (for error text). */ + objectName?: string; + /** Known top-level field names, so `record.` can be checked. */ + fields?: readonly string[]; +} + +export interface ExprValidationError { + /** Self-correcting message: what is wrong + the correct form. */ + message: string; + /** The offending source, echoed for location. */ + source: string; +} + +export interface ExprValidationResult { + ok: boolean; + errors: ExprValidationError[]; +} + +/** A bare `{x}` that is NOT part of a `{{x}}` mustache hole. */ +const SINGLE_BRACE_RE = /(?:^|[^{])\{\s*([A-Za-z_$][\w.$]*)\s*\}(?!\})/; +/** `record.` / `previous.` head references for field-existence. */ +const RECORD_REF_RE = /\b(?:record|previous)\.([A-Za-z_$][\w$]*)/g; + +/** The dialect a field role expects (Decision 2). */ +export function expectedDialect(role: FieldRole): 'cel' | 'template' { + return role === 'template' ? 'template' : 'cel'; +} + +function toSource(input: ExprInput): { dialect?: string; source: string } { + if (input == null) return { source: '' }; + if (typeof input === 'string') return { source: input }; + return { dialect: input.dialect, source: input.source ?? '' }; +} + +function bracesHint(source: string): string | null { + const m = SINGLE_BRACE_RE.exec(source); + if (!m) return null; + const ref = m[1]; + return ( + `it looks like a \`{${ref}}\` template brace was used inside a CEL expression — ` + + `\`{…}\` parses as a CEL map literal and fails. Write the bare reference instead, e.g. \`${ref}\`.` + ); +} + +function checkFieldExistence(source: string, schema: ExprSchemaHint | undefined, errors: ExprValidationError[]): void { + if (!schema?.fields || schema.fields.length === 0) return; + const known = new Set(schema.fields); + const seen = new Set(); + let m: RegExpExecArray | null; + RECORD_REF_RE.lastIndex = 0; + while ((m = RECORD_REF_RE.exec(source)) !== null) { + const field = m[1]; + if (seen.has(field) || known.has(field)) continue; + seen.add(field); + const suggestion = nearest(field, schema.fields); + errors.push({ + source, + message: + `unknown field \`${field}\`${schema.objectName ? ` on \`${schema.objectName}\`` : ''}` + + (suggestion ? ` — did you mean \`${suggestion}\`?` : ''), + }); + } +} + +/** Cheap edit-distance suggestion for typo'd field names. */ +function nearest(name: string, candidates: readonly string[]): string | undefined { + let best: string | undefined; + let bestD = Infinity; + for (const c of candidates) { + const d = levenshtein(name, c); + if (d < bestD) { bestD = d; best = c; } + } + return bestD <= Math.max(2, Math.floor(name.length / 3)) ? best : undefined; +} + +function levenshtein(a: string, b: string): number { + const m = a.length, n = b.length; + const dp = Array.from({ length: m + 1 }, (_, i) => i); + for (let j = 1; j <= n; j++) { + let prev = dp[0]; + dp[0] = j; + for (let i = 1; i <= m; i++) { + const tmp = dp[i]; + dp[i] = Math.min(dp[i] + 1, dp[i - 1] + 1, prev + (a[i - 1] === b[j - 1] ? 0 : 1)); + prev = tmp; + } + } + return dp[m]; +} + +/** + * Validate one expression for a given field role. Never throws — returns a + * structured result. Call sites decide whether to throw (build/registration) + * or report (agent tool). + */ +export function validateExpression( + role: FieldRole, + input: ExprInput, + schema?: ExprSchemaHint, +): ExprValidationResult { + const { dialect, source } = toSource(input); + const errors: ExprValidationError[] = []; + if (!source.trim()) return { ok: true, errors }; + + if (role === 'template') { + // Templates must be the `template` dialect (or untyped string). Reject a + // CEL envelope mistakenly placed in a text field. + if (dialect && dialect !== 'template') { + errors.push({ source, message: `expected a text template but got a \`${dialect}\` expression.` }); + return { ok: false, errors }; + } + const compiled = templateEngine.compile(source); + if (!compiled.ok) { + errors.push({ source, message: `invalid template: ${compiled.error.message} (holes use \`{{ path }}\`).` }); + } + // A single `{x}` in a template is the legacy/deprecated form (ADR-0032 §3). + const hint = SINGLE_BRACE_RE.test(source) ? bracesHintForTemplate(source) : null; + if (hint) errors.push({ source, message: hint }); + return { ok: errors.length === 0, errors }; + } + + // predicate | value → CEL + if (dialect && dialect !== 'cel') { + errors.push({ source, message: `expected a CEL expression but got a \`${dialect}\` dialect.` }); + return { ok: false, errors }; + } + const compiled = celEngine.compile(source); + if (!compiled.ok) { + const hint = bracesHint(source); + errors.push({ + source, + message: + `invalid CEL ${role}: ${compiled.error.message}` + + (hint ? ` — ${hint}` : ` — ${role}s are bare CEL (e.g. \`record.rating >= 4\`).`), + }); + } else { + checkFieldExistence(source, schema, errors); + } + return { ok: errors.length === 0, errors }; +} + +function bracesHintForTemplate(source: string): string { + const m = SINGLE_BRACE_RE.exec(source); + const ref = m?.[1] ?? 'field'; + return `single-brace \`{${ref}}\` is not a valid template hole — use double braces: \`{{ ${ref} }}\`.`; +} + +/** + * Introspect what an author (esp. an agent) may use in a field (Decision 1e): + * the expected dialect, the in-scope field references, and the callable + * functions. Feeds the authoring context so the model does not guess. + */ +export function introspectScope(role: FieldRole, schema?: ExprSchemaHint): { + dialect: 'cel' | 'template'; + fields: string[]; + roots: string[]; + functions: string[]; +} { + return { + dialect: expectedDialect(role), + fields: [...(schema?.fields ?? [])], + roots: ['record', 'previous', 'input', 'os', 'vars'], + functions: CEL_STDLIB_FUNCTIONS, + }; +} + +/** Public catalog of CEL stdlib functions available in expressions. */ +export const CEL_STDLIB_FUNCTIONS: string[] = [ + 'now', 'today', 'daysFromNow', 'daysBetween', 'date', 'datetime', 'timestamp', + 'isBlank', 'isEmpty', 'coalesce', 'len', 'size', 'int', 'float', 'string', 'bool', + 'upper', 'lower', 'trim', 'contains', 'startsWith', 'endsWith', 'matches', + 'has', 'min', 'max', 'abs', 'round', +]; diff --git a/packages/services/service-ai/package.json b/packages/services/service-ai/package.json index 9fe991ff0..fb6c18a56 100644 --- a/packages/services/service-ai/package.json +++ b/packages/services/service-ai/package.json @@ -20,6 +20,7 @@ "dependencies": { "@ai-sdk/provider": "^3.0.10", "@objectstack/core": "workspace:*", + "@objectstack/formula": "workspace:*", "@objectstack/spec": "workspace:*", "@objectstack/types": "workspace:*", "ai": "^6.0.193", diff --git a/packages/services/service-ai/src/__tests__/metadata-tools.test.ts b/packages/services/service-ai/src/__tests__/metadata-tools.test.ts index f91362f5e..441f2a8e3 100644 --- a/packages/services/service-ai/src/__tests__/metadata-tools.test.ts +++ b/packages/services/service-ai/src/__tests__/metadata-tools.test.ts @@ -52,8 +52,8 @@ function createMockMetadataService( // ═══════════════════════════════════════════════════════════════════ describe('Metadata Tool Definitions', () => { - it('should define exactly 6 tools', () => { - expect(METADATA_TOOL_DEFINITIONS).toHaveLength(6); + it('should define exactly 7 tools', () => { + expect(METADATA_TOOL_DEFINITIONS).toHaveLength(7); }); it('should include all expected tool names', () => { @@ -65,6 +65,7 @@ describe('Metadata Tool Definitions', () => { 'delete_field', 'list_objects', 'describe_object', + 'validate_expression', ]); }); @@ -156,14 +157,15 @@ describe('registerMetadataTools', () => { registerMetadataTools(registry, { metadataService }); }); - it('should register all 6 tools', () => { - expect(registry.size).toBe(6); + it('should register all 7 tools', () => { + expect(registry.size).toBe(7); expect(registry.has('create_object')).toBe(true); expect(registry.has('add_field')).toBe(true); expect(registry.has('modify_field')).toBe(true); expect(registry.has('delete_field')).toBe(true); expect(registry.has('list_objects')).toBe(true); expect(registry.has('describe_object')).toBe(true); + expect(registry.has('validate_expression')).toBe(true); }); }); @@ -188,10 +190,10 @@ describe('registerDataTools + registerMetadataTools — unified list/describe', const sizeAfterBoth = registry.size; // Data tools define: query_records, get_record, aggregate_data (3) - // Metadata tools define: create_object, add_field, modify_field, delete_field, list_objects, describe_object (6) - // Total should be 3 + 6 = 9 + // Metadata tools define: create_object, add_field, modify_field, delete_field, list_objects, describe_object, validate_expression (7) + // Total should be 3 + 7 = 10 expect(sizeAfterData).toBe(3); - expect(sizeAfterBoth).toBe(sizeAfterData + 6); + expect(sizeAfterBoth).toBe(sizeAfterData + 7); // Unified list/describe should be present (from metadata tools) expect(registry.has('list_objects')).toBe(true); diff --git a/packages/services/service-ai/src/tools/index.ts b/packages/services/service-ai/src/tools/index.ts index 645d75d4a..00e6d2318 100644 --- a/packages/services/service-ai/src/tools/index.ts +++ b/packages/services/service-ai/src/tools/index.ts @@ -19,3 +19,4 @@ export { modifyFieldTool } from './modify-field.tool.js'; export { deleteFieldTool } from './delete-field.tool.js'; export { listObjectsTool } from './list-objects.tool.js'; export { describeObjectTool } from './describe-object.tool.js'; +export { validateExpressionTool } from './validate-expression.tool.js'; diff --git a/packages/services/service-ai/src/tools/metadata-tools.ts b/packages/services/service-ai/src/tools/metadata-tools.ts index 4c25dce15..1739f5b7b 100644 --- a/packages/services/service-ai/src/tools/metadata-tools.ts +++ b/packages/services/service-ai/src/tools/metadata-tools.ts @@ -15,6 +15,7 @@ export { modifyFieldTool } from './modify-field.tool.js'; export { deleteFieldTool } from './delete-field.tool.js'; export { listObjectsTool } from './list-objects.tool.js'; export { describeObjectTool } from './describe-object.tool.js'; +export { validateExpressionTool } from './validate-expression.tool.js'; import { createObjectTool } from './create-object.tool.js'; import { addFieldTool } from './add-field.tool.js'; @@ -22,6 +23,8 @@ import { modifyFieldTool } from './modify-field.tool.js'; import { deleteFieldTool } from './delete-field.tool.js'; import { listObjectsTool } from './list-objects.tool.js'; import { describeObjectTool } from './describe-object.tool.js'; +import { validateExpressionTool } from './validate-expression.tool.js'; +import { validateExpression, introspectScope, type FieldRole } from '@objectstack/formula'; /** All built-in metadata management tool definitions (Tool metadata). */ export const METADATA_TOOL_DEFINITIONS: Tool[] = [ @@ -31,6 +34,7 @@ export const METADATA_TOOL_DEFINITIONS: Tool[] = [ deleteFieldTool, listObjectsTool, describeObjectTool, + validateExpressionTool, ]; // --------------------------------------------------------------------------- @@ -190,6 +194,42 @@ export interface MetadataToolContext { // Handler Factories // --------------------------------------------------------------------------- +/** + * validate_expression (ADR-0032 §1e) — run the shared validator on an + * expression before it is saved, so the agent self-corrects at authoring time. + * Resolves the object's field names (when `objectName` is given) for + * schema-aware field-existence checks. + */ +function createValidateExpressionHandler(ctx: MetadataToolContext): ToolHandler { + return async (args) => { + const { role, source, objectName } = args as { role?: string; source?: string; objectName?: string }; + if (!source || typeof source !== 'string') { + return JSON.stringify({ ok: false, errors: [{ message: '"source" is required' }] }); + } + const fieldRole: FieldRole = role === 'template' || role === 'value' ? role : 'predicate'; + + let fields: string[] | undefined; + if (objectName) { + try { + const objectDef = (await ctx.metadataService.getObject(objectName)) as ObjectDef | undefined; + if (objectDef?.fields) fields = Object.keys(objectDef.fields); + } catch { + // schema lookup is best-effort — fall back to syntax-only validation + } + } + + const result = validateExpression(fieldRole, source, objectName ? { objectName, fields } : undefined); + const scope = introspectScope(fieldRole, objectName ? { objectName, fields } : undefined); + return JSON.stringify({ + ok: result.ok, + errors: result.errors, + dialect: scope.dialect, + // On failure, surface what IS in scope so the agent can fix the reference. + ...(result.ok ? {} : { availableFields: scope.fields, roots: scope.roots, functions: scope.functions }), + }); + }; +} + function createCreateObjectHandler(ctx: MetadataToolContext): ToolHandler { return async (args) => { const { name, label, packageId: explicitPackageId, fields, enableFeatures } = args as { @@ -605,4 +645,5 @@ export function registerMetadataTools( registry.register(deleteFieldTool, createDeleteFieldHandler(context)); registry.register(listObjectsTool, createListObjectsHandler(context)); registry.register(describeObjectTool, createDescribeObjectHandler(context)); + registry.register(validateExpressionTool, createValidateExpressionHandler(context)); } diff --git a/packages/services/service-ai/src/tools/validate-expression.tool.ts b/packages/services/service-ai/src/tools/validate-expression.tool.ts new file mode 100644 index 000000000..202e051c8 --- /dev/null +++ b/packages/services/service-ai/src/tools/validate-expression.tool.ts @@ -0,0 +1,50 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import { defineTool } from '@objectstack/spec/ai'; + +/** + * validate_expression — AI Tool Metadata (ADR-0032 §Decision 1e) + * + * Lets an authoring agent check an expression *before* committing it — closing + * the self-correction loop at authoring time instead of at build/run time. It + * runs the same shared validator (`@objectstack/formula`) that `objectstack + * build` and `registerFlow` use, so the verdict and the corrective message are + * identical across every surface. + * + * Use it whenever writing a `condition` / `guard` / formula / computed value + * (role `predicate` or `value`, bare CEL) or a notification/title text body + * (role `template`, `{{ path }}` holes). Pass `objectName` to additionally + * check that referenced `record.` names exist on that object. + */ +export const validateExpressionTool = defineTool({ + name: 'validate_expression', + label: 'Validate Expression', + description: + 'Validate an ObjectStack expression (flow/validation condition, formula, computed value, or text template) ' + + 'BEFORE saving it. Returns { ok, errors[] } with self-correcting messages. ' + + 'Predicates and computed values are bare CEL (e.g. `record.rating >= 4`) — never wrap field references in `{…}` braces. ' + + 'Templates use `{{ path }}` holes. Pass objectName to also check that record. references exist.', + category: 'utility', + builtIn: true, + parameters: { + type: 'object', + properties: { + role: { + type: 'string', + enum: ['predicate', 'value', 'template'], + description: + "Field role: 'predicate' (boolean condition/guard, bare CEL), 'value' (computed value, bare CEL), or 'template' (text with {{ path }} holes).", + }, + source: { + type: 'string', + description: 'The expression source to validate.', + }, + objectName: { + type: 'string', + description: 'Optional object machine name (snake_case) for schema-aware field-existence checks.', + }, + }, + required: ['role', 'source'], + additionalProperties: false, + }, +}); diff --git a/packages/services/service-automation/src/engine.test.ts b/packages/services/service-automation/src/engine.test.ts index ab75e42e0..0d784a4f6 100644 --- a/packages/services/service-automation/src/engine.test.ts +++ b/packages/services/service-automation/src/engine.test.ts @@ -342,6 +342,39 @@ describe('AutomationEngine', () => { expect(executed).toContain('yes_branch'); expect(executed).not.toContain('no_branch'); }); + + // ADR-0032 §Decision 1a — a malformed condition (the #1491 `{record.x}` + // template-brace-in-CEL mistake) is rejected LOUDLY at registration, with + // the offending location + source + a corrective hint — not silently at + // run time. + it('rejects a brace-in-CEL condition at registration — #1491 (Decision 1a)', () => { + const register = () => + engine.registerFlow('bad_condition', { + name: 'bad_condition', + label: 'Bad Condition', + type: 'record_change', + nodes: [ + { id: 'check', type: 'decision', label: 'Check', config: { condition: '{record.rating} >= 4' } }, + { id: 'start', type: 'start', label: 'Start' }, + { id: 'end', type: 'end', label: 'End' }, + ], + edges: [ + { id: 'e1', source: 'start', target: 'check' }, + { id: 'e2', source: 'check', target: 'end', condition: '{record.rating} >= 4' }, + ], + }); + expect(register).toThrow(/\{record\.rating\} >= 4/); + expect(register).toThrow(/template braces|bare CEL/); + }); + + // ADR-0032 §Decision 1c — defense in depth: if a malformed predicate ever + // reaches the runtime evaluator (bypassing registration validation), it + // THROWS an attributed error rather than swallowing to `false`. + it('evaluateCondition throws (never returns false) on a malformed CEL predicate — Decision 1c', () => { + const vars = new Map([['record', { rating: 5 }]]); + expect(() => engine.evaluateCondition({ dialect: 'cel', source: '{record.rating} >= 4' }, vars)) + .toThrow(/source:|template braces|bare CEL/); + }); }); describe('Durable Suspend / Resume (ADR-0019)', () => { diff --git a/packages/services/service-automation/src/engine.ts b/packages/services/service-automation/src/engine.ts index 3146997ff..839b2566f 100644 --- a/packages/services/service-automation/src/engine.ts +++ b/packages/services/service-automation/src/engine.ts @@ -14,7 +14,7 @@ import { ConnectorSchema } from '@objectstack/spec/integration'; // silently returned `false`, so EVERY start-node / edge condition (record-change // `previous.*`, `budget > 100000`, …) skipped its flow. A static import binds the // engine at module load in both ESM and CJS builds. -import { ExpressionEngine } from '@objectstack/formula'; +import { ExpressionEngine, validateExpression } from '@objectstack/formula'; // ─── Node Executor Interface (Plugin Extension Point) ─────────────── @@ -608,6 +608,13 @@ export class AutomationEngine implements IAutomationService { // executeNode() already throws NO_EXECUTOR at run time for unknown types. this.validateNodeTypes(name, parsed); + // ADR-0032 §Decision 1a — parse-validate every predicate at registration, + // so a malformed condition (e.g. the #1491 `{record.x}` template-brace-in- + // CEL mistake) is a LOUD registration error with the offending source, + // not a silent runtime `false`. Hard-fail: a broken predicate is never + // safe to run. + this.validateFlowExpressions(name, parsed); + // Version history management const history = this.flowVersionHistory.get(name) ?? []; history.push({ @@ -1060,6 +1067,48 @@ export class AutomationEngine implements IAutomationService { } } + /** + * ADR-0032 §Decision 1a — parse-validate every predicate in the flow at + * registration. Predicates are bare CEL; this catches the #1491 class + * (`{record.x}` template braces in a condition → CEL parse error) and any + * other malformed predicate LOUDLY, with the offending location + source + + * a corrective hint, instead of letting it fail silently at run time. + * + * Only the *predicate* surfaces are checked here (start/node `config.condition` + * and `edge.condition`) — node string fields are templates (a different + * dialect) and are validated by the template engine, not as CEL. + */ + private validateFlowExpressions(flowName: string, flow: FlowParsed): void { + const failures: string[] = []; + + const check = (where: string, raw: unknown): void => { + if (raw == null) return; + // Conditions are predicates (bare CEL). Delegate to the one shared + // validator (ADR-0032 §5) so the corrective message matches the CLI + // build and the agent `validate_expression` tool exactly. + const result = validateExpression('predicate', raw as string | { dialect?: string; source?: string }); + for (const e of result.errors) { + failures.push(` • ${where}: ${e.message}\n source: \`${e.source}\``); + } + }; + + for (const node of flow.nodes) { + const cfg = (node.config ?? {}) as Record; + // start-node trigger gate + decision/branch predicates live in config.condition + check(`node '${node.id}' (${node.type}) condition`, cfg.condition); + } + for (const edge of flow.edges) { + check(`edge '${edge.id}' (${edge.source}→${edge.target}) condition`, edge.condition as unknown); + } + + if (failures.length > 0) { + throw new Error( + `Flow '${flowName}' has ${failures.length} invalid condition${failures.length > 1 ? 's' : ''} (ADR-0032 §1a). ` + + `Conditions are bare CEL — do not wrap field references in \`{…}\` template braces:\n${failures.join('\n')}`, + ); + } + } + /** * Detect cycles in the flow graph (DAG validation). * Uses DFS with coloring (white/gray/black) to detect back edges. @@ -1453,10 +1502,27 @@ export class AutomationEngine implements IAutomationService { { dialect: 'cel', source: exprStr }, { extra: { ...vars, vars }, record: vars }, ); - if (!result.ok) return false; + // ADR-0032 §Decision 1c — NO silent fallback. A non-`ok` result is a + // real fault (malformed predicate, or — pre build-validation — a + // `{…}` template mistakenly written into a CEL condition). Surfacing + // it as a thrown, attributed error makes execute()'s catch record a + // loud flow failure, instead of the old `return false` that made a + // broken condition indistinguishable from "condition not met" (#1491). + if (!result.ok) { + throw new Error( + `condition failed to evaluate as CEL: ${result.error?.message ?? 'unknown error'} — ` + + `source: \`${exprStr}\`. Conditions are bare CEL (e.g. \`record.rating >= 4\`); ` + + `do not wrap field references in \`{…}\` template braces.`, + ); + } return Boolean(result.value); - } catch { - return false; + } catch (err) { + // Re-throw with the source attached (ADR-0032 §1d — errors written + // for self-correction). Never swallow to `false`. + const msg = (err as Error)?.message ?? String(err); + throw new Error( + msg.includes('source:') ? msg : `condition evaluation error: ${msg} — source: \`${exprStr}\``, + ); } } diff --git a/packages/spec/src/automation/flow.zod.ts b/packages/spec/src/automation/flow.zod.ts index ff15f819a..7ca9e97c4 100644 --- a/packages/spec/src/automation/flow.zod.ts +++ b/packages/spec/src/automation/flow.zod.ts @@ -210,8 +210,11 @@ export const FlowEdgeSchema = lazySchema(() => z.object({ * ], * edges: [ * { id: "e1", source: "start", target: "check_amount" }, - * { id: "e2", source: "check_amount", target: "auto_approve", condition: "{amount} < 500" }, - * { id: "e3", source: "check_amount", target: "submit_for_approval", condition: "{amount} >= 500" } + * // Conditions are bare CEL (ADR-0032). Reference fields directly — + * // `record.amount`, `previous.status`, `.field` — and DO NOT wrap them + * // in `{…}` template braces: `{amount}` parses as a CEL map literal and fails. + * { id: "e2", source: "check_amount", target: "auto_approve", condition: "record.amount < 500" }, + * { id: "e3", source: "check_amount", target: "submit_for_approval", condition: "record.amount >= 500" } * ] * } */ diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 38699077b..b1371c65b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -470,6 +470,9 @@ importers: '@objectstack/driver-sqlite-wasm': specifier: workspace:^ version: link:../plugins/driver-sqlite-wasm + '@objectstack/formula': + specifier: workspace:* + version: link:../formula '@objectstack/objectql': specifier: workspace:^ version: link:../objectql @@ -1684,6 +1687,9 @@ importers: '@objectstack/core': specifier: workspace:* version: link:../../core + '@objectstack/formula': + specifier: workspace:* + version: link:../../formula '@objectstack/spec': specifier: workspace:* version: link:../../spec From bcec3ab91fb197f5442fa1b5f201045ff716341f Mon Sep 17 00:00:00 2001 From: Jack Zhuang <277994282+os-zhuang@users.noreply.github.com> Date: Tue, 2 Jun 2026 12:48:36 +0800 Subject: [PATCH 2/2] fix(formula): linear template-hole regex (avoid polynomial ReDoS) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeQL flagged the template hole matcher `/\{\{\s*([^}]*?)\s*\}\}/` as a polynomial-ReDoS: `\s` ⊆ `[^}]`, so wrapping the lazy group in `\s*` makes the matcher ambiguous on untrusted template source. Switch to a single greedy `/\{\{([^}]*)\}\}/` and strip surrounding whitespace in parseHole — provably linear, identical behavior (74 formula tests green). Co-Authored-By: Claude Opus 4.8 --- packages/formula/src/template-engine.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/formula/src/template-engine.ts b/packages/formula/src/template-engine.ts index 8dc4e04e8..226c086d5 100644 --- a/packages/formula/src/template-engine.ts +++ b/packages/formula/src/template-engine.ts @@ -21,8 +21,14 @@ import type { Expression } from '@objectstack/spec'; import { buildScope } from './stdlib'; import type { DialectEngine, EvalContext, EvalResult } from './types'; -/** A hole: capture the full inner content (no `}` allowed inside). */ -const HOLE_RE = /\{\{\s*([^}]*?)\s*\}\}/g; +/** + * A hole: capture the full inner content (no `}` allowed inside). Uses a single + * greedy `[^}]*` (not `\s*…\s*` around a lazy group) so the pattern is linear — + * `\s` is a subset of `[^}]`, and wrapping a lazy group in `\s*` creates an + * ambiguous (polynomial-ReDoS) matcher. Surrounding whitespace is stripped in + * `parseHole` instead. + */ +const HOLE_RE = /\{\{([^}]*)\}\}/g; // ───────────────────────── formatter whitelist (ADR-0032 §3) ──────────────