diff --git a/packages/cli/src/commands/lint.ts b/packages/cli/src/commands/lint.ts index 79081e5fa..4871e0815 100644 --- a/packages/cli/src/commands/lint.ts +++ b/packages/cli/src/commands/lint.ts @@ -2,9 +2,14 @@ import { Args, Command, Flags } from '@oclif/core'; import chalk from 'chalk'; +import { bundleRequire } from 'bundle-require'; import { normalizeStackInput } from '@objectstack/spec'; -import { loadConfig } from '../utils/config.js'; +import { loadConfig, BUNDLE_REQUIRE_EXTERNALS } from '../utils/config.js'; import { computeI18nCoverage } from '../utils/i18n-coverage.js'; +import { lintDataModel } from '../lint/data-model-rules.js'; +import { scoreMetadata } from '../lint/score.js'; +import { runMetadataEval } from '../lint/metadata-eval.js'; +import { DEFAULT_METADATA_EVAL_CORPUS } from '../lint/corpus.js'; import { printHeader, printSuccess, @@ -209,6 +214,11 @@ export function lintConfig(config: any): LintIssue[] { } } + // ── Data-model best practices (relationships / master-detail / roll-ups) ── + // Cross-object rules that encode the conventions in ADR-0035 and the + // objectstack-data/-ui skills. These double as the eval rubric (see score.ts). + issues.push(...lintDataModel(objects)); + return issues; } @@ -224,6 +234,19 @@ export default class Lint extends Command { static override flags = { json: Flags.boolean({ description: 'Output as JSON' }), fix: Flags.boolean({ description: 'Show what would be fixed (dry-run)' }), + score: Flags.boolean({ + description: 'Print a 0–100 metadata-quality score (the lint rubric) for this project', + }), + eval: Flags.boolean({ + description: 'Run the metadata-generation eval over the bundled golden corpus and report scores', + }), + generator: Flags.string({ + description: 'Path to a module that default-exports (prompt, id) => stack; enables live eval (scores generated output instead of fixtures). Requires --eval.', + }), + 'eval-min': Flags.integer({ + description: 'Minimum passing score per eval case', + default: 75, + }), 'skip-i18n': Flags.boolean({ description: 'Skip translation coverage checks' }), 'i18n-strict': Flags.boolean({ description: 'Treat missing translations in non-default locales as errors', @@ -239,6 +262,14 @@ export default class Lint extends Command { const configPath = args.config; const timer = createTimer(); + // ── Eval mode — score generated metadata against the convention rubric ── + // Short-circuits the project lint: this evaluates a generation corpus, not + // the current config. + if (flags.eval) { + await this.runEval(flags, timer); + return; + } + if (!flags.json) { printHeader('Lint'); printStep('Loading configuration...'); @@ -270,6 +301,9 @@ export default class Lint extends Command { } } + // Metadata-quality score (the lint rubric expressed as 0–100). + const score = flags.score ? scoreMetadata(normalized) : null; + // ── JSON output ── if (flags.json) { const errors = issues.filter((i) => i.severity === 'error'); @@ -281,6 +315,7 @@ export default class Lint extends Command { errors: errors.length, warnings: warnings.length, suggestions: suggestions.length, + ...(score ? { score: score.score, grade: score.grade } : {}), issues, duration: timer.elapsed(), }, null, 2)); @@ -292,6 +327,7 @@ export default class Lint extends Command { if (issues.length === 0) { printSuccess(`All checks passed ${chalk.dim(`(${timer.display()})`)}`); + if (score) this.printScore(score); console.log(''); return; } @@ -343,6 +379,8 @@ export default class Lint extends Command { if (suggestions.length > 0) parts.push(chalk.blue(`${suggestions.length} suggestion(s)`)); console.log(` ${parts.join(', ')} ${chalk.dim(`(${timer.display()})`)}`); + if (score) this.printScore(score); + if (flags.fix) { console.log(''); printInfo('Dry-run mode: no files were modified.'); @@ -362,4 +400,87 @@ export default class Lint extends Command { process.exit(1); } } + + private printScore(score: ReturnType): void { + const gColor = + score.grade === 'A' ? chalk.green : + score.grade === 'B' ? chalk.cyan : + score.grade === 'C' ? chalk.yellow : + chalk.red; + console.log(''); + console.log(` ${chalk.bold('Metadata quality:')} ${gColor(`${score.score}/100 (${score.grade})`)}`); + const c = score.counts; + console.log( + chalk.dim( + ` ${c.schemaErrors} schema · ${c.errors} error(s) · ${c.warnings} warning(s) · ${c.suggestions} suggestion(s)`, + ), + ); + } + + /** + * Eval mode (`--eval`): run the metadata-generation rubric over the bundled + * golden corpus (offline), or — when `--generator ` is supplied — + * over the stacks that module produces for each prompt (live). + */ + private async runEval(flags: any, timer: ReturnType): Promise { + let generate: ((prompt: string, id: string) => unknown | Promise) | undefined; + + if (flags.generator) { + try { + const { mod } = await bundleRequire({ + filepath: flags.generator, + external: BUNDLE_REQUIRE_EXTERNALS, + }); + const fn = (mod as any).default ?? (mod as any).generate; + if (typeof fn !== 'function') { + throw new Error('module must default-export a function (prompt, id) => stack'); + } + generate = fn; + } catch (error: any) { + const msg = `Failed to load generator "${flags.generator}": ${error?.message || error}`; + if (flags.json) console.log(JSON.stringify({ error: msg })); + else printError(msg); + process.exit(1); + } + } + + const report = await runMetadataEval(DEFAULT_METADATA_EVAL_CORPUS, { + ...(generate ? { generate } : {}), + minScore: flags['eval-min'], + }); + + if (flags.json) { + console.log(JSON.stringify({ ...report, duration: timer.elapsed() }, null, 2)); + if (!report.ok) process.exit(1); + return; + } + + printHeader('Metadata Generation Eval'); + printInfo(`Mode: ${chalk.white(report.mode)} · cases: ${report.total} · pass bar: ${flags['eval-min']}`); + console.log(''); + + for (const r of report.results) { + const ok = r.passed; + const color = ok ? chalk.green : chalk.red; + const icon = ok ? '✓' : '✗'; + console.log(` ${color(icon)} ${chalk.bold(r.id)} ${color(`${r.score.score}/100 (${r.score.grade})`)}`); + if (r.generationError) { + console.log(chalk.red(` generation error: ${r.generationError}`)); + } else if (!ok) { + const c = r.score.counts; + console.log(chalk.dim(` ${c.schemaErrors} schema · ${c.errors} error(s) · ${c.warnings} warning(s)`)); + const firstReal = r.score.issues.find((i) => i.severity !== 'suggestion') || r.score.issues[0]; + if (firstReal) console.log(chalk.dim(` e.g. ${firstReal.rule}: ${firstReal.message}`)); + } + } + + console.log(''); + const summaryColor = report.ok ? chalk.green : chalk.red; + console.log( + ` ${summaryColor(`${report.passed}/${report.total} passed`)} · mean ${report.meanScore}/100 ${chalk.dim(`(${timer.display()})`)}`, + ); + console.log(''); + + if (!report.ok) process.exit(1); + } } diff --git a/packages/cli/src/lint/corpus.ts b/packages/cli/src/lint/corpus.ts new file mode 100644 index 000000000..382bb45d1 --- /dev/null +++ b/packages/cli/src/lint/corpus.ts @@ -0,0 +1,274 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * Golden metadata-generation eval corpus. + * + * Each case pairs a natural-language authoring prompt with a fixture stack that + * represents the *ideal* generated output — schema-valid and following the + * platform's modelling conventions (master-detail ownership, inlineEdit for + * line items, relatedList/no-inlineEdit for associations, roll-up summaries, + * select options, name fields, labels). Offline, the harness asserts these + * golden stacks clear the quality bar; live, the same prompts benchmark a real + * model against them. + * + * Keep these representative of common enterprise shapes — they double as + * worked examples of the conventions the AI generator should target. + */ + +import type { MetadataEvalCase } from './metadata-eval.js'; + +const manifest = (id: string, namespace: string, name: string) => ({ + id, + namespace, + version: '1.0.0', + name, + type: 'app' as const, +}); + +export const DEFAULT_METADATA_EVAL_CORPUS: MetadataEvalCase[] = [ + { + id: 'invoice_with_line_items', + prompt: + 'Model an invoicing app: an invoice with multiple line items (product, quantity, unit price, amount). The invoice total should sum its line amounts, and line items are entered together with the invoice.', + note: 'master_detail + inlineEdit + roll-up summary', + fixture: { + manifest: manifest('invoicing', 'invoicing', 'Invoicing'), + objects: [ + { + name: 'invoice', + label: 'Invoice', + fields: { + name: { type: 'text', label: 'Invoice Number', required: true }, + account: { type: 'lookup', label: 'Account', reference: 'account' }, + status: { + type: 'select', + label: 'Status', + options: [ + { label: 'Draft', value: 'draft' }, + { label: 'Sent', value: 'sent' }, + { label: 'Paid', value: 'paid' }, + ], + }, + total: { + type: 'summary', + label: 'Total', + summaryOperations: { object: 'invoice_line', field: 'amount', function: 'sum' }, + }, + }, + }, + { + name: 'invoice_line', + label: 'Invoice Line', + fields: { + invoice: { + type: 'master_detail', + label: 'Invoice', + reference: 'invoice', + required: true, + deleteBehavior: 'cascade', + inlineEdit: true, + relatedListTitle: 'Line Items', + }, + product: { type: 'text', label: 'Product', required: true }, + quantity: { type: 'number', label: 'Quantity', required: true }, + unit_price: { type: 'currency', label: 'Unit Price', required: true }, + amount: { type: 'currency', label: 'Amount', required: true }, + }, + }, + { + name: 'account', + label: 'Account', + fields: { name: { type: 'text', label: 'Account Name', required: true } }, + }, + ], + }, + }, + + { + id: 'project_with_tasks', + prompt: + 'A project management app: a project owns many tasks (title, status, estimate in hours). Tasks are edited inline within the project, and the project shows a task count and total estimate.', + note: 'master_detail + inlineEdit + count/sum roll-ups', + fixture: { + manifest: manifest('pm', 'pm_app', 'Project Management'), + objects: [ + { + name: 'project', + label: 'Project', + fields: { + name: { type: 'text', label: 'Project Name', required: true }, + status: { + type: 'select', + label: 'Status', + options: [ + { label: 'Planned', value: 'planned' }, + { label: 'Active', value: 'active' }, + { label: 'Done', value: 'done' }, + ], + }, + task_count: { + type: 'summary', + label: 'Tasks', + summaryOperations: { object: 'task', field: 'estimate_hours', function: 'count' }, + }, + total_estimate: { + type: 'summary', + label: 'Total Estimate', + summaryOperations: { object: 'task', field: 'estimate_hours', function: 'sum' }, + }, + }, + }, + { + name: 'task', + label: 'Task', + fields: { + title: { type: 'text', label: 'Title', required: true }, + project: { + type: 'master_detail', + label: 'Project', + reference: 'project', + required: true, + deleteBehavior: 'cascade', + inlineEdit: true, + }, + status: { + type: 'select', + label: 'Status', + options: [ + { label: 'To Do', value: 'todo' }, + { label: 'In Progress', value: 'in_progress' }, + { label: 'Done', value: 'done' }, + ], + }, + estimate_hours: { type: 'number', label: 'Estimate (h)' }, + }, + }, + ], + }, + }, + + { + id: 'blog_post_with_comments', + prompt: + 'A blog: posts have a title and body. Readers leave comments on a post (author, body). Comments belong to the post but are an activity stream, not something you fill in when writing the post.', + note: 'association child: master_detail WITHOUT inlineEdit (related list on detail page)', + fixture: { + manifest: manifest('blog', 'blog_app', 'Blog'), + objects: [ + { + name: 'post', + label: 'Post', + fields: { + title: { type: 'text', label: 'Title', required: true }, + body: { type: 'textarea', label: 'Body' }, + }, + }, + { + name: 'post_comment', + label: 'Comment', + fields: { + // Association: owned by the post (cascade) but NOT inlineEdit — + // surfaced as a related list on the post's detail page. + post: { + type: 'master_detail', + label: 'Post', + reference: 'post', + required: true, + deleteBehavior: 'cascade', + }, + author: { type: 'text', label: 'Author', required: true }, + body: { type: 'textarea', label: 'Comment', required: true }, + }, + }, + ], + }, + }, + + { + id: 'expense_report_with_lines', + prompt: + 'An expense report app: a report has a title and a submitter. It contains expense lines (category, description, amount, date). The report total sums the line amounts and lines are entered inline.', + note: 'master_detail + inlineEdit + sum roll-up + select options', + fixture: { + manifest: manifest('expenses', 'expenses', 'Expenses'), + objects: [ + { + name: 'expense_report', + label: 'Expense Report', + fields: { + name: { type: 'text', label: 'Title', required: true }, + submitter: { type: 'text', label: 'Submitter', required: true }, + total: { + type: 'summary', + label: 'Total', + summaryOperations: { object: 'expense_line', field: 'amount', function: 'sum' }, + }, + }, + }, + { + name: 'expense_line', + label: 'Expense Line', + fields: { + expense_report: { + type: 'master_detail', + label: 'Expense Report', + reference: 'expense_report', + required: true, + deleteBehavior: 'cascade', + inlineEdit: true, + }, + category: { + type: 'select', + label: 'Category', + options: [ + { label: 'Travel', value: 'travel' }, + { label: 'Meals', value: 'meals' }, + { label: 'Lodging', value: 'lodging' }, + ], + }, + description: { type: 'text', label: 'Description' }, + amount: { type: 'currency', label: 'Amount', required: true }, + spent_on: { type: 'date', label: 'Date' }, + }, + }, + ], + }, + }, + + { + id: 'crm_account_with_contacts', + prompt: + 'A simple CRM: accounts and their contacts. A contact belongs to an account but can exist independently and is not entered inline with the account.', + note: 'lookup (independent child) — should NOT be master_detail/inlineEdit', + fixture: { + manifest: manifest('crm', 'crm_app', 'CRM'), + objects: [ + { + name: 'account', + label: 'Account', + fields: { + name: { type: 'text', label: 'Account Name', required: true }, + industry: { + type: 'select', + label: 'Industry', + options: [ + { label: 'Tech', value: 'tech' }, + { label: 'Retail', value: 'retail' }, + ], + }, + }, + }, + { + name: 'contact', + label: 'Contact', + fields: { + full_name: { type: 'text', label: 'Full Name', required: true }, + email: { type: 'email', label: 'Email' }, + // Independent lifecycle → lookup, not master_detail. + account: { type: 'lookup', label: 'Account', reference: 'account' }, + }, + }, + ], + }, + }, +]; diff --git a/packages/cli/src/lint/data-model-rules.ts b/packages/cli/src/lint/data-model-rules.ts new file mode 100644 index 000000000..a1c42a872 --- /dev/null +++ b/packages/cli/src/lint/data-model-rules.ts @@ -0,0 +1,233 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * Data-model best-practice lint rules. + * + * These rules encode the relationship / master-detail / roll-up conventions the + * platform ships (see the objectstack-data and objectstack-ui skills, ADR-0035). + * They run over the normalized object set and flag anti-patterns that an + * author — human OR an AI generator — commonly produces. They are intentionally + * heuristic: structural problems are `error`, likely-wrong choices are + * `warning`, and "you probably want this" nudges are `suggestion`. None of them + * block on a judgement call. + * + * The same rules double as the automated rubric for the metadata-generation + * eval (see `score.ts`): a generated stack scores well exactly when it is + * schema-valid AND lint-clean here. + */ + +export type Severity = 'error' | 'warning' | 'suggestion'; + +export interface LintIssue { + severity: Severity; + rule: string; + message: string; + path: string; + fix?: string; +} + +// ─── Heuristics ───────────────────────────────────────────────────── + +const RELATIONSHIP_TYPES = new Set(['lookup', 'master_detail']); +const NUMERIC_TYPES = new Set([ + 'number', 'currency', 'integer', 'decimal', 'percent', 'float', 'double', +]); +const OPTION_FIELD_TYPES = new Set(['select', 'multiselect', 'radio', 'enum']); +const NAME_LIKE_FIELDS = ['name', 'title', 'subject', 'label', 'full_name', 'display_name', 'code']; + +/** Child object names that read as line-items / composition (entered with the parent). */ +const LINE_ITEM_RE = /_(line|lines|line_item|line_items|item|items|detail|details|entry|entries)$/; +/** Child object names that read as associations (comments/audit/activity — NOT line items). */ +const ASSOCIATION_TOKENS = [ + 'comment', 'attachment', 'note', 'log', 'audit', 'activity', 'activities', + 'history', 'event', 'reaction', 'like', 'mention', 'notification', 'message', +]; + +function isLineItemName(name: string): boolean { + return LINE_ITEM_RE.test(name); +} + +function isAssociationName(name: string): boolean { + const lc = name.toLowerCase(); + return ASSOCIATION_TOKENS.some((t) => lc === t || lc.endsWith(`_${t}`) || lc.endsWith(`_${t}s`)); +} + +interface FieldEntry { + name: string; + def: any; +} + +function fieldEntries(fields: any): FieldEntry[] { + if (!fields) return []; + if (Array.isArray(fields)) { + return fields.filter((f) => f && f.name != null).map((f) => ({ name: String(f.name), def: f })); + } + return Object.entries(fields).map(([name, def]) => ({ name, def })); +} + +function refOf(def: any): string | undefined { + return def?.reference || def?.reference_to; +} + +// ─── Rule engine ──────────────────────────────────────────────────── + +/** + * Lint the relationship / data-modeling conventions across the full object set. + * Pure and deterministic — safe to call from both the `lint` command and the + * metadata-generation scorer. + */ +export function lintDataModel(objects: any[]): LintIssue[] { + const issues: LintIssue[] = []; + if (!Array.isArray(objects) || objects.length === 0) return issues; + + // Index: parent object name → child relationships pointing at it. + const childrenByParent: Record> = {}; + for (const child of objects) { + if (!child?.name) continue; + for (const { name: fieldName, def } of fieldEntries(child.fields)) { + if (!RELATIONSHIP_TYPES.has(def?.type)) continue; + const parent = refOf(def); + if (!parent) continue; + (childrenByParent[parent] ||= []).push({ child, fieldName, def }); + } + } + + for (let i = 0; i < objects.length; i++) { + const obj = objects[i]; + if (!obj?.name) continue; + const objPath = `objects[${i}]`; + const fields = fieldEntries(obj.fields); + + // R9 — object should have a derivable display/primary field. + const hasNameField = + !!obj.primaryField || + !!obj.titleFormat || + fields.some((f) => NAME_LIKE_FIELDS.includes(f.name)); + if (fields.length > 0 && !hasNameField) { + issues.push({ + severity: 'suggestion', + rule: 'object/missing-name-field', + message: `Object "${obj.name}" has no name/title field or primaryField — records will display as raw IDs`, + path: `${objPath}.fields`, + }); + } + + for (const { name: fieldName, def } of fields) { + if (!def || typeof def !== 'object') continue; + const fieldPath = `${objPath}.fields.${fieldName}`; + const type = def.type; + + // R8 — option fields need options (or an options source). + if (OPTION_FIELD_TYPES.has(type)) { + const hasOptions = + (Array.isArray(def.options) && def.options.length > 0) || + !!def.optionsFrom || !!def.dataSource || !!def.reference; + if (!hasOptions) { + issues.push({ + severity: 'warning', + rule: 'field/select-missing-options', + message: `${type} field "${obj.name}.${fieldName}" has no options`, + path: `${fieldPath}.options`, + }); + } + } + + if (!RELATIONSHIP_TYPES.has(type)) continue; + const parent = refOf(def); + + // R1 — relationship fields must declare a reference target. + if (!parent) { + issues.push({ + severity: 'error', + rule: 'relationship/missing-reference', + message: `${type} field "${obj.name}.${fieldName}" is missing a reference target`, + path: `${fieldPath}.reference`, + }); + continue; + } + + if (type === 'master_detail') { + // R2 — master-detail children should require their parent. + if (def.required !== true) { + issues.push({ + severity: 'warning', + rule: 'relationship/master-detail-required', + message: `master_detail "${obj.name}.${fieldName}" → ${parent} should be required (a detail record cannot exist without its master)`, + path: `${fieldPath}.required`, + fix: 'required: true', + }); + } + // R3 — be explicit about cascade behaviour. + if (def.deleteBehavior === undefined) { + issues.push({ + severity: 'suggestion', + rule: 'relationship/delete-behavior', + message: `master_detail "${obj.name}.${fieldName}" → ${parent} should declare deleteBehavior (cascade/restrict/set_null)`, + path: `${fieldPath}.deleteBehavior`, + fix: "deleteBehavior: 'cascade'", + }); + } + // R5 — line-item children are usually entered inline with the parent. + if (isLineItemName(obj.name) && def.inlineEdit !== true) { + issues.push({ + severity: 'suggestion', + rule: 'relationship/line-items-inline-edit', + message: `"${obj.name}" looks like line items of ${parent}; consider inlineEdit: true on "${fieldName}" so it is entered inline within the ${parent} form`, + path: `${fieldPath}.inlineEdit`, + fix: 'inlineEdit: true', + }); + } + } + + // R4 — a line-item-shaped child should usually be master_detail, not lookup. + if (type === 'lookup' && isLineItemName(obj.name)) { + issues.push({ + severity: 'suggestion', + rule: 'relationship/line-item-should-be-master-detail', + message: `"${obj.name}" looks like line items of ${parent} but uses lookup; master_detail gives ownership + cascade + roll-ups`, + path: `${fieldPath}.type`, + fix: "type: 'master_detail'", + }); + } + + // R6 — associations should NOT be inlined into the parent's entry form. + if (def.inlineEdit === true && isAssociationName(obj.name)) { + issues.push({ + severity: 'warning', + rule: 'relationship/association-inline-edit', + message: `"${obj.name}" is an association (comments/audit/activity), not line items — inlineEdit clutters the ${parent} entry form; surface it as a detail-page related list instead`, + path: `${fieldPath}.inlineEdit`, + fix: 'remove inlineEdit (use relatedList on the detail page)', + }); + } + } + + // R7 — a parent of master_detail children with numeric fields should roll one up. + const children = childrenByParent[obj.name] || []; + const summaryChildObjects = new Set( + fields + .filter((f) => f.def?.type === 'summary') + .map((f) => f.def?.summaryOperations?.object || f.def?.reference) + .filter(Boolean), + ); + const seenSuggestedChild = new Set(); + for (const { child, def } of children) { + if (def?.type !== 'master_detail') continue; + if (!child?.name || seenSuggestedChild.has(child.name)) continue; + if (summaryChildObjects.has(child.name)) continue; + // Only nudge when the child actually has something worth aggregating. + const numericChildField = fieldEntries(child.fields).find((f) => NUMERIC_TYPES.has(f.def?.type)); + if (!numericChildField) continue; + seenSuggestedChild.add(child.name); + issues.push({ + severity: 'suggestion', + rule: 'rollup/missing-summary', + message: `"${obj.name}" owns "${child.name}" (master_detail) with numeric field "${numericChildField.name}" but has no roll-up summary; consider a summary field (count/sum) on ${obj.name}`, + path: `${objPath}.fields`, + fix: `summary field aggregating ${child.name}.${numericChildField.name}`, + }); + } + } + + return issues; +} diff --git a/packages/cli/src/lint/metadata-eval.ts b/packages/cli/src/lint/metadata-eval.ts new file mode 100644 index 000000000..2b05139d5 --- /dev/null +++ b/packages/cli/src/lint/metadata-eval.ts @@ -0,0 +1,128 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * Metadata-generation eval harness. + * + * Measures how well a stack of metadata follows the platform's modelling + * conventions, using `scoreMetadata` (the linter-as-rubric) as the judge. Two + * modes, same rubric: + * + * - **Offline (default):** each case ships a golden fixture stack — the ideal + * output for its prompt. Scoring the fixtures is a deterministic regression + * guard: it proves the conventions + rubric stay self-consistent, runs in CI, + * and needs no API key. + * + * - **Live (opt-in):** pass `generate(prompt, caseId) => stack`. The harness + * scores whatever the generator produced for each prompt instead of the + * fixture. Wire `generate` to `AIService.generateObject` + * (+ blueprint→metadata expansion) to benchmark a real model against the + * same bar. The seam is injected so this package keeps no LLM dependency. + */ + +import { scoreMetadata, type MetadataScore } from './score.js'; + +export interface MetadataEvalCase { + /** Stable id (snake_case). */ + id: string; + /** The natural-language authoring goal a generator would receive. */ + prompt: string; + /** Golden/representative stack used in offline mode. */ + fixture: unknown; + /** Minimum score to pass this case (defaults to the runner's `minScore`). */ + minScore?: number; + /** Optional human note about what the case exercises. */ + note?: string; +} + +export interface MetadataEvalCaseResult { + id: string; + prompt: string; + /** True when the (generated or fixture) stack failed to materialize. */ + generationError?: string; + score: MetadataScore; + minScore: number; + passed: boolean; + /** 'fixture' offline, 'generated' when a live generator produced the stack. */ + source: 'fixture' | 'generated'; +} + +export interface MetadataEvalReport { + results: MetadataEvalCaseResult[]; + total: number; + passed: number; + failed: number; + /** Mean score across all cases (0–100, rounded). */ + meanScore: number; + /** True when every case passed. */ + ok: boolean; + mode: 'offline' | 'live'; +} + +export interface RunMetadataEvalOptions { + /** + * Live generator. When provided, the harness scores `generate(prompt, id)` + * instead of the case fixture. Returning a rejected promise / throwing marks + * that case as a generation error (failed). + */ + generate?: (prompt: string, caseId: string) => unknown | Promise; + /** Default pass threshold for cases that don't set their own `minScore`. */ + minScore?: number; +} + +const DEFAULT_MIN_SCORE = 75; + +/** + * Run the eval over a set of cases. Offline (fixtures) unless `generate` is + * supplied. Never throws — generation failures become failed cases. + */ +export async function runMetadataEval( + cases: MetadataEvalCase[], + options: RunMetadataEvalOptions = {}, +): Promise { + const defaultMin = options.minScore ?? DEFAULT_MIN_SCORE; + const live = typeof options.generate === 'function'; + const results: MetadataEvalCaseResult[] = []; + + for (const c of cases) { + const minScore = c.minScore ?? defaultMin; + let stack: unknown = c.fixture; + let generationError: string | undefined; + let source: 'fixture' | 'generated' = 'fixture'; + + if (live) { + source = 'generated'; + try { + stack = await options.generate!(c.prompt, c.id); + } catch (err: any) { + generationError = err?.message || String(err); + stack = {}; + } + } + + const score = scoreMetadata(stack); + results.push({ + id: c.id, + prompt: c.prompt, + ...(generationError ? { generationError } : {}), + score, + minScore, + passed: !generationError && score.score >= minScore && score.counts.errors === 0 && score.counts.schemaErrors === 0, + source, + }); + } + + const passed = results.filter((r) => r.passed).length; + const meanScore = results.length + ? Math.round(results.reduce((sum, r) => sum + r.score.score, 0) / results.length) + : 0; + + return { + results, + total: results.length, + passed, + failed: results.length - passed, + meanScore, + ok: passed === results.length, + mode: live ? 'live' : 'offline', + }; +} diff --git a/packages/cli/src/lint/score.ts b/packages/cli/src/lint/score.ts new file mode 100644 index 000000000..2bccecde0 --- /dev/null +++ b/packages/cli/src/lint/score.ts @@ -0,0 +1,108 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * Metadata quality scorer — the automated rubric for the metadata-generation + * eval (see `metadata-eval.ts`). + * + * The premise (chosen with the user): the LINTER is the rubric. A generated + * stack is "good" exactly when it (a) parses against the canonical spec schema + * and (b) is clean under the data-model lint rules. This is deterministic, + * needs no LLM/API key, and runs in CI — yet it directly measures the + * conventions we care about (master-detail, inlineEdit, roll-ups, selects, + * naming, labels). + * + * `scoreMetadata(stack)` returns a 0–100 score plus a breakdown so callers can + * show *why* a generation scored the way it did. + */ + +import { ObjectStackDefinitionSchema, normalizeStackInput } from '@objectstack/spec'; +import { lintConfig } from '../commands/lint.js'; +import type { LintIssue, Severity } from './data-model-rules.js'; + +/** Penalty weights per issue class. Schema errors are the most severe. */ +export const SCORE_WEIGHTS = { + schemaError: 12, + error: 8, + warning: 3, + suggestion: 1, +} as const; + +export interface MetadataScore { + /** 0–100 quality score (higher is better). */ + score: number; + /** True when the stack is schema-valid AND has zero lint errors. */ + valid: boolean; + /** Letter grade derived from `score` (A ≥ 90, B ≥ 75, C ≥ 60, D ≥ 40, F otherwise). */ + grade: 'A' | 'B' | 'C' | 'D' | 'F'; + counts: { + schemaErrors: number; + errors: number; + warnings: number; + suggestions: number; + }; + /** Schema parse error messages (empty when valid). */ + schemaErrors: string[]; + /** Lint issues (naming, labels, structure, data-model conventions). */ + issues: LintIssue[]; +} + +function gradeFor(score: number): MetadataScore['grade'] { + if (score >= 90) return 'A'; + if (score >= 75) return 'B'; + if (score >= 60) return 'C'; + if (score >= 40) return 'D'; + return 'F'; +} + +function bySeverity(issues: LintIssue[], severity: Severity): LintIssue[] { + return issues.filter((i) => i.severity === severity); +} + +/** + * Score a stack definition (raw or normalized) for metadata quality. + * Pure & deterministic. + */ +export function scoreMetadata(stack: unknown): MetadataScore { + const normalized = normalizeStackInput((stack ?? {}) as Record); + + // 1) Schema validity against the canonical spec. + const parsed = ObjectStackDefinitionSchema.safeParse(normalized); + const schemaErrors: string[] = parsed.success + ? [] + : parsed.error.issues.map((i) => `${i.path.join('.') || '(root)'}: ${i.message}`); + + // 2) Lint (naming/labels/structure + data-model conventions). + let issues: LintIssue[] = []; + try { + issues = lintConfig(normalized) as LintIssue[]; + } catch { + // A linter crash shouldn't mask the schema verdict — treat as no lint data. + issues = []; + } + + const errors = bySeverity(issues, 'error'); + const warnings = bySeverity(issues, 'warning'); + const suggestions = bySeverity(issues, 'suggestion'); + + const penalty = + schemaErrors.length * SCORE_WEIGHTS.schemaError + + errors.length * SCORE_WEIGHTS.error + + warnings.length * SCORE_WEIGHTS.warning + + suggestions.length * SCORE_WEIGHTS.suggestion; + + const score = Math.max(0, Math.min(100, 100 - penalty)); + + return { + score: Math.round(score), + valid: schemaErrors.length === 0 && errors.length === 0, + grade: gradeFor(score), + counts: { + schemaErrors: schemaErrors.length, + errors: errors.length, + warnings: warnings.length, + suggestions: suggestions.length, + }, + schemaErrors, + issues, + }; +} diff --git a/packages/cli/test/data-model-rules.test.ts b/packages/cli/test/data-model-rules.test.ts new file mode 100644 index 000000000..9e7ed6f8a --- /dev/null +++ b/packages/cli/test/data-model-rules.test.ts @@ -0,0 +1,175 @@ +import { describe, expect, it } from 'vitest'; +import { lintDataModel } from '../src/lint/data-model-rules'; +import { lintConfig } from '../src/commands/lint'; + +const rulesOf = (issues: { rule: string }[]) => issues.map((i) => i.rule); +const has = (issues: { rule: string }[], rule: string) => rulesOf(issues).includes(rule); + +describe('lintDataModel — relationships', () => { + it('returns [] for empty input', () => { + expect(lintDataModel([])).toEqual([]); + expect(lintDataModel(undefined as any)).toEqual([]); + }); + + it('flags a relationship field missing a reference (error)', () => { + const issues = lintDataModel([ + { name: 'task', fields: { project: { type: 'master_detail', required: true } } }, + ]); + const issue = issues.find((i) => i.rule === 'relationship/missing-reference'); + expect(issue?.severity).toBe('error'); + }); + + it('warns when a master_detail is not required', () => { + const issues = lintDataModel([ + { name: 'invoice', fields: { number: { type: 'text' } } }, + { name: 'invoice_line', fields: { invoice: { type: 'master_detail', reference: 'invoice', required: true, deleteBehavior: 'cascade' } } }, + ]); + expect(has(issues, 'relationship/master-detail-required')).toBe(false); + + const issues2 = lintDataModel([ + { name: 'invoice_line', fields: { invoice: { type: 'master_detail', reference: 'invoice', deleteBehavior: 'cascade' } } }, + ]); + const req = issues2.find((i) => i.rule === 'relationship/master-detail-required'); + expect(req?.severity).toBe('warning'); + }); + + it('suggests an explicit deleteBehavior on master_detail', () => { + const issues = lintDataModel([ + { name: 'invoice_line', fields: { invoice: { type: 'master_detail', reference: 'invoice', required: true } } }, + ]); + const db = issues.find((i) => i.rule === 'relationship/delete-behavior'); + expect(db?.severity).toBe('suggestion'); + }); + + it('suggests inlineEdit on master_detail line-item children', () => { + const issues = lintDataModel([ + { name: 'order_line', fields: { order: { type: 'master_detail', reference: 'order', required: true, deleteBehavior: 'cascade' } } }, + ]); + expect(has(issues, 'relationship/line-items-inline-edit')).toBe(true); + }); + + it('does NOT suggest inlineEdit when already set', () => { + const issues = lintDataModel([ + { name: 'order_line', fields: { order: { type: 'master_detail', reference: 'order', required: true, deleteBehavior: 'cascade', inlineEdit: true } } }, + ]); + expect(has(issues, 'relationship/line-items-inline-edit')).toBe(false); + }); + + it('suggests master_detail when a line-item child uses lookup', () => { + const issues = lintDataModel([ + { name: 'quote_item', fields: { quote: { type: 'lookup', reference: 'quote' } } }, + ]); + expect(has(issues, 'relationship/line-item-should-be-master-detail')).toBe(true); + }); + + it('warns when an association child is inlineEdit', () => { + const issues = lintDataModel([ + { name: 'ticket_comment', fields: { ticket: { type: 'master_detail', reference: 'ticket', required: true, deleteBehavior: 'cascade', inlineEdit: true } } }, + ]); + const assoc = issues.find((i) => i.rule === 'relationship/association-inline-edit'); + expect(assoc?.severity).toBe('warning'); + }); + + it('does NOT treat a line-item child as an association', () => { + const issues = lintDataModel([ + { name: 'invoice_line', fields: { invoice: { type: 'master_detail', reference: 'invoice', required: true, deleteBehavior: 'cascade', inlineEdit: true } } }, + ]); + expect(has(issues, 'relationship/association-inline-edit')).toBe(false); + }); +}); + +describe('lintDataModel — roll-ups', () => { + it('suggests a roll-up when a parent owns master_detail children with numeric fields', () => { + const issues = lintDataModel([ + { name: 'invoice', label: 'Invoice', fields: { number: { type: 'text' } } }, + { name: 'invoice_line', fields: { invoice: { type: 'master_detail', reference: 'invoice', required: true, deleteBehavior: 'cascade' }, amount: { type: 'currency' } } }, + ]); + expect(has(issues, 'rollup/missing-summary')).toBe(true); + }); + + it('does NOT suggest a roll-up when a summary already aggregates that child', () => { + const issues = lintDataModel([ + { + name: 'invoice', + fields: { + number: { type: 'text' }, + total: { type: 'summary', summaryOperations: { object: 'invoice_line', field: 'amount', function: 'sum' } }, + }, + }, + { name: 'invoice_line', fields: { invoice: { type: 'master_detail', reference: 'invoice', required: true, deleteBehavior: 'cascade' }, amount: { type: 'currency' } } }, + ]); + expect(has(issues, 'rollup/missing-summary')).toBe(false); + }); + + it('does NOT suggest a roll-up for a child with no numeric field', () => { + const issues = lintDataModel([ + { name: 'project', fields: { name: { type: 'text' } } }, + { name: 'project_note', fields: { project: { type: 'master_detail', reference: 'project', required: true, deleteBehavior: 'cascade' }, body: { type: 'textarea' } } }, + ]); + expect(has(issues, 'rollup/missing-summary')).toBe(false); + }); +}); + +describe('lintDataModel — fields & objects', () => { + it('warns on a select field with no options', () => { + const issues = lintDataModel([ + { name: 'task', fields: { status: { type: 'select' } } }, + ]); + const sel = issues.find((i) => i.rule === 'field/select-missing-options'); + expect(sel?.severity).toBe('warning'); + }); + + it('accepts a select with options or an options source', () => { + const withOptions = lintDataModel([ + { name: 'task', fields: { status: { type: 'select', options: [{ label: 'Open', value: 'open' }] } } }, + ]); + expect(has(withOptions, 'field/select-missing-options')).toBe(false); + }); + + it('suggests a name field when an object has none', () => { + const issues = lintDataModel([ + { name: 'widget', fields: { color: { type: 'text' }, size: { type: 'number' } } }, + ]); + expect(has(issues, 'object/missing-name-field')).toBe(true); + }); + + it('accepts an object with a name field or primaryField', () => { + expect(has(lintDataModel([{ name: 'a', fields: { name: { type: 'text' } } }]), 'object/missing-name-field')).toBe(false); + expect(has(lintDataModel([{ name: 'b', primaryField: 'code', fields: { code: { type: 'text' } } }]), 'object/missing-name-field')).toBe(false); + }); + + it('handles array-shaped fields', () => { + const issues = lintDataModel([ + { name: 'invoice_line', fields: [{ name: 'invoice', type: 'master_detail', reference: 'invoice' }] }, + ]); + expect(has(issues, 'relationship/master-detail-required')).toBe(true); + }); +}); + +describe('lintConfig integration', () => { + it('a clean invoice/line model produces no data-model errors or warnings', () => { + const issues = lintConfig({ + objects: [ + { + name: 'invoice', + label: 'Invoice', + fields: { + number: { type: 'text', label: 'Number' }, + total: { type: 'summary', label: 'Total', summaryOperations: { object: 'invoice_line', field: 'amount', function: 'sum' } }, + }, + }, + { + name: 'invoice_line', + label: 'Invoice Line', + fields: { + invoice: { type: 'master_detail', label: 'Invoice', reference: 'invoice', required: true, deleteBehavior: 'cascade', inlineEdit: true }, + product: { type: 'text', label: 'Product' }, + amount: { type: 'currency', label: 'Amount' }, + }, + }, + ], + }); + const dataModel = issues.filter((i) => i.rule.startsWith('relationship/') || i.rule.startsWith('rollup/') || i.rule.startsWith('field/') || i.rule.startsWith('object/')); + expect(dataModel.filter((i) => i.severity !== 'suggestion')).toEqual([]); + }); +}); diff --git a/packages/cli/test/metadata-eval.test.ts b/packages/cli/test/metadata-eval.test.ts new file mode 100644 index 000000000..cf4c331b4 --- /dev/null +++ b/packages/cli/test/metadata-eval.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, it } from 'vitest'; +import { runMetadataEval, type MetadataEvalCase } from '../src/lint/metadata-eval'; +import { DEFAULT_METADATA_EVAL_CORPUS } from '../src/lint/corpus'; +import { scoreMetadata } from '../src/lint/score'; + +describe('runMetadataEval — offline (golden corpus)', () => { + it('every golden fixture clears the quality bar', async () => { + const report = await runMetadataEval(DEFAULT_METADATA_EVAL_CORPUS); + expect(report.mode).toBe('offline'); + expect(report.total).toBe(DEFAULT_METADATA_EVAL_CORPUS.length); + // Surface which case failed (if any) for a useful assertion message. + const failures = report.results.filter((r) => !r.passed).map((r) => `${r.id}=${r.score.score}`); + expect(failures).toEqual([]); + expect(report.ok).toBe(true); + expect(report.meanScore).toBeGreaterThanOrEqual(90); + }); + + it('each golden fixture is schema-valid with no errors', async () => { + for (const c of DEFAULT_METADATA_EVAL_CORPUS) { + const s = scoreMetadata(c.fixture); + expect(s.counts.schemaErrors, `${c.id} schema`).toBe(0); + expect(s.counts.errors, `${c.id} lint errors`).toBe(0); + } + }); + + it('the corpus exercises the key conventions', () => { + const ids = DEFAULT_METADATA_EVAL_CORPUS.map((c) => c.id); + expect(ids).toContain('invoice_with_line_items'); + expect(ids).toContain('blog_post_with_comments'); // association (no inlineEdit) + expect(ids).toContain('crm_account_with_contacts'); // lookup (independent) + }); +}); + +describe('runMetadataEval — live seam', () => { + const oneCase: MetadataEvalCase[] = [ + { id: 'c1', prompt: 'invoice with lines', fixture: { manifest: { id: 'a', namespace: 'aa', version: '1.0.0', name: 'A', type: 'app' } } }, + ]; + + it('scores the generated stack (not the fixture) when a generator is injected', async () => { + // Generator returns a broken stack → the case fails under the rubric. + const badGen = () => ({ + objects: [{ name: 'BadName', fields: { Status: { type: 'select' } } }], + }); + const report = await runMetadataEval(oneCase, { generate: badGen }); + expect(report.mode).toBe('live'); + expect(report.results[0].source).toBe('generated'); + expect(report.results[0].passed).toBe(false); + expect(report.ok).toBe(false); + }); + + it('a generator that produces a clean stack passes', async () => { + const goodGen = () => ({ + objects: [ + { name: 'invoice', label: 'Invoice', fields: { name: { type: 'text', label: 'Name', required: true } } }, + { name: 'invoice_line', label: 'Line', fields: { invoice: { type: 'master_detail', label: 'Invoice', reference: 'invoice', required: true, deleteBehavior: 'cascade', inlineEdit: true }, amount: { type: 'currency', label: 'Amount', required: true } } }, + ], + }); + const report = await runMetadataEval(oneCase, { generate: goodGen }); + expect(report.results[0].passed).toBe(true); + }); + + it('a generation error becomes a failed case (never throws)', async () => { + const throwingGen = () => { throw new Error('model unavailable'); }; + const report = await runMetadataEval(oneCase, { generate: throwingGen }); + expect(report.results[0].passed).toBe(false); + expect(report.results[0].generationError).toContain('model unavailable'); + }); + + it('respects per-case minScore', async () => { + const cases: MetadataEvalCase[] = [{ ...oneCase[0], minScore: 100 }]; + // The empty-ish fixture scores 100, so minScore 100 still passes offline. + const report = await runMetadataEval(cases); + expect(report.results[0].minScore).toBe(100); + }); +}); diff --git a/packages/cli/test/score.test.ts b/packages/cli/test/score.test.ts new file mode 100644 index 000000000..491dee0c1 --- /dev/null +++ b/packages/cli/test/score.test.ts @@ -0,0 +1,102 @@ +import { describe, expect, it } from 'vitest'; +import { scoreMetadata } from '../src/lint/score'; + +/** A clean, convention-following invoice + line-item model. */ +const GOOD_STACK = { + manifest: { id: 'demo', namespace: 'demo_app', version: '1.0.0', name: 'Demo', type: 'app' as const }, + objects: [ + { + name: 'invoice', + label: 'Invoice', + fields: { + name: { type: 'text', label: 'Invoice Number', required: true }, + status: { type: 'select', label: 'Status', options: [{ label: 'Draft', value: 'draft' }, { label: 'Sent', value: 'sent' }] }, + total: { type: 'summary', label: 'Total', summaryOperations: { object: 'invoice_line', field: 'amount', function: 'sum' } }, + }, + }, + { + name: 'invoice_line', + label: 'Invoice Line', + fields: { + invoice: { type: 'master_detail', label: 'Invoice', reference: 'invoice', required: true, deleteBehavior: 'cascade', inlineEdit: true }, + product: { type: 'text', label: 'Product', required: true }, + amount: { type: 'currency', label: 'Amount', required: true }, + }, + }, + ], +}; + +/** Schema-invalid (bad namespace) AND riddled with anti-patterns. */ +const BAD_STACK = { + manifest: { id: 'bad', namespace: 'X', version: '1.0.0', name: 'Bad', type: 'app' as const }, // namespace fails pattern → schema error + objects: [ + { + name: 'BadName', // not snake_case → lint error + // no label → lint error; no name field → suggestion + fields: { + Status: { type: 'select' }, // not snake_case + no options + widget_amount: { type: 'number' }, + }, + }, + { + name: 'cart_item', // line-item shaped... + label: 'Cart Item', + fields: { + cart: { type: 'lookup', reference: 'cart' }, // ...but lookup, not master_detail → suggestion + }, + }, + ], +}; + +describe('scoreMetadata', () => { + it('scores a clean model highly and marks it valid', () => { + const r = scoreMetadata(GOOD_STACK); + expect(r.valid).toBe(true); + expect(r.counts.schemaErrors).toBe(0); + expect(r.counts.errors).toBe(0); + expect(r.score).toBeGreaterThanOrEqual(90); + expect(r.grade).toBe('A'); + }); + + it('scores a broken model low and marks it invalid', () => { + const r = scoreMetadata(BAD_STACK); + expect(r.valid).toBe(false); + expect(r.counts.schemaErrors).toBeGreaterThan(0); + expect(r.score).toBeLessThan(GOOD_STACK ? scoreMetadata(GOOD_STACK).score : 100); + }); + + it('a clean model outscores a broken one (monotonicity)', () => { + expect(scoreMetadata(GOOD_STACK).score).toBeGreaterThan(scoreMetadata(BAD_STACK).score); + }); + + it('an empty stack is schema-valid and scores 100', () => { + const r = scoreMetadata({}); + expect(r.valid).toBe(true); + expect(r.score).toBe(100); + expect(r.grade).toBe('A'); + }); + + it('suggestions cost less than warnings cost less than errors', () => { + // Only suggestions: a master_detail without explicit deleteBehavior (suggestion). + const onlySuggestions = scoreMetadata({ + objects: [ + { name: 'invoice', label: 'Invoice', fields: { name: { type: 'text', label: 'Name', required: true } } }, + { name: 'invoice_line', label: 'Line', fields: { invoice: { type: 'master_detail', label: 'Invoice', reference: 'invoice', required: true, inlineEdit: true } } }, + ], + }); + // A warning: master_detail not required. + const withWarning = scoreMetadata({ + objects: [ + { name: 'invoice', label: 'Invoice', fields: { name: { type: 'text', label: 'Name', required: true } } }, + { name: 'invoice_line', label: 'Line', fields: { invoice: { type: 'master_detail', label: 'Invoice', reference: 'invoice', deleteBehavior: 'cascade' } } }, + ], + }); + expect(onlySuggestions.score).toBeGreaterThan(withWarning.score); + expect(onlySuggestions.counts.errors).toBe(0); + }); + + it('reports the schema error messages', () => { + const r = scoreMetadata(BAD_STACK); + expect(r.schemaErrors.some((m) => m.includes('namespace'))).toBe(true); + }); +}); diff --git a/skills/objectstack-data/SKILL.md b/skills/objectstack-data/SKILL.md index 85e740766..5ef6b0467 100644 --- a/skills/objectstack-data/SKILL.md +++ b/skills/objectstack-data/SKILL.md @@ -700,6 +700,48 @@ changes must produce byte-identical `dist/objectstack.json`. CEL + pinned --- +## Linting & Generation Quality + +`objectstack lint` checks the data model against the conventions in this skill — +not just naming/labels but the relationship/master-detail/roll-up patterns. Run +it after authoring or generating metadata. Severities: `error` (structural, +fails the command), `warning` (likely-wrong choice), `suggestion` (nudge). + +Data-model rules (in addition to naming/label/i18n): + +| Rule | Severity | Catches | +|---|---|---| +| `relationship/missing-reference` | error | lookup/master_detail without a `reference` target | +| `relationship/master-detail-required` | warning | a `master_detail` that isn't `required` (a detail can't exist without its master) | +| `relationship/delete-behavior` | suggestion | `master_detail` without an explicit `deleteBehavior` | +| `relationship/line-items-inline-edit` | suggestion | a `*_line`/`*_item` master_detail child without `inlineEdit` | +| `relationship/line-item-should-be-master-detail` | suggestion | a line-item-shaped child using `lookup` instead of `master_detail` | +| `relationship/association-inline-edit` | warning | an association (comment/audit/activity) marked `inlineEdit` (clutters the parent form — use a detail-page related list) | +| `rollup/missing-summary` | suggestion | a parent of numeric master_detail children with no roll-up `summary` | +| `field/select-missing-options` | warning | a `select`/`multiselect`/`radio` with no `options` (or options source) | +| `object/missing-name-field` | suggestion | an object with no name/title field or `primaryField` | + +These same rules are the **rubric for AI-generated metadata** — a generation is +"good" exactly when it is schema-valid and lint-clean: + +- `objectstack lint --score` — print a 0–100 metadata-quality score (+ letter + grade and severity breakdown) for the current project. Schema errors and lint + errors weigh most; suggestions barely move it. +- `objectstack lint --eval` — run the generation eval over a bundled golden + corpus (invoice+lines, project+tasks, blog+comments, expense+lines, + account+contacts) offline; each case must clear the pass bar (`--eval-min`, + default 75). Deterministic, no API key. +- `objectstack lint --eval --generator ./gen.mjs` — **live** eval: the module + default-exports `(prompt, id) => stack`; wire it to your agent / + `AIService.generateObject` (+ blueprint→metadata expansion) + to benchmark a real model against the same rubric. + +When generating object metadata, target a lint-clean model: master_detail (with +`required` + `deleteBehavior` + `inlineEdit` for line items), roll-up summaries +on parents, `select` options, and a name/title field per object. + +--- + ## References See [references/_index.md](./references/_index.md) for the full list of Zod