diff --git a/.objectui-sha b/.objectui-sha index 411068843..5ba9d5f37 100644 --- a/.objectui-sha +++ b/.objectui-sha @@ -1 +1 @@ -c657e9b180a8c7aa56938d22d7baaee5dc8fc765 +5ab1e0e630a5db14defba535cc69f67cb71746c7 diff --git a/packages/cli/src/commands/serve.ts b/packages/cli/src/commands/serve.ts index 208130f9e..eb1fa3a63 100644 --- a/packages/cli/src/commands/serve.ts +++ b/packages/cli/src/commands/serve.ts @@ -186,6 +186,12 @@ export default class Serve extends Command { */ static readonly ALWAYS_ON_CAPABILITIES: readonly string[] = Object.freeze([ 'queue', 'job', 'cache', 'settings', 'email', 'storage', 'sharing', 'messaging', + // `analytics` is foundational post-ADR-0021: the AnalyticsService backs the + // dataset/cube query endpoints (`/api/v1/analytics/*`). It must exist even + // when an app declares no `analyticsCubes`, because a `dataset` can be + // authored/previewed inline (Studio) and compiled on the fly. Without it the + // dataset preview + dashboard/report analytics widgets silently no-op. + 'analytics', ]); /** diff --git a/packages/objectql/src/engine.ts b/packages/objectql/src/engine.ts index f23a633f5..13afbb187 100644 --- a/packages/objectql/src/engine.ts +++ b/packages/objectql/src/engine.ts @@ -922,7 +922,7 @@ export class ObjectQL implements IDataEngine { // 5. Register all other metadata types generically const metadataArrayKeys = [ // UI Protocol - 'actions', 'views', 'pages', 'dashboards', 'reports', 'themes', + 'actions', 'views', 'pages', 'dashboards', 'reports', 'datasets', 'themes', // Automation Protocol 'flows', 'workflows', 'approvals', 'webhooks', 'jobs', @@ -1076,7 +1076,7 @@ export class ObjectQL implements IDataEngine { // Register metadata arrays (actions, views, triggers, etc.) const metadataArrayKeys = [ - 'actions', 'views', 'pages', 'dashboards', 'reports', 'themes', + 'actions', 'views', 'pages', 'dashboards', 'reports', 'datasets', 'themes', 'flows', 'workflows', 'approvals', 'webhooks', 'roles', 'permissions', 'profiles', 'sharingRules', 'policies', 'agents', 'ragPipelines', 'apis', diff --git a/packages/rest/src/analytics-routes.test.ts b/packages/rest/src/analytics-routes.test.ts new file mode 100644 index 000000000..095d0bfe1 --- /dev/null +++ b/packages/rest/src/analytics-routes.test.ts @@ -0,0 +1,100 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect, vi } from 'vitest'; +import { RestServer } from './rest-server'; + +// ── helpers ────────────────────────────────────────────────────────────────── + +function mockServer() { + return { + get: vi.fn(), post: vi.fn(), put: vi.fn(), delete: vi.fn(), patch: vi.fn(), + use: vi.fn(), listen: vi.fn().mockResolvedValue(undefined), close: vi.fn().mockResolvedValue(undefined), + }; +} +function mockProtocol() { + return { getDiscovery: vi.fn().mockResolvedValue({ version: 'v0', endpoints: {} }), getMetaTypes: vi.fn().mockResolvedValue([]), getMetaItems: vi.fn().mockResolvedValue([]) }; +} +function mockRes() { + const res: any = { statusCode: 200, body: undefined }; + res.status = vi.fn((c: number) => { res.statusCode = c; return res; }); + res.json = vi.fn((b: any) => { res.body = b; return res; }); + res.end = vi.fn(() => res); + return res; +} + +const inlineDataset = { + name: 'sales', label: 'Sales', object: 'opportunity', include: ['account'], + dimensions: [{ name: 'region', field: 'account.region', type: 'string' }], + measures: [{ name: 'revenue', aggregate: 'sum', field: 'amount' }], +}; +const selection = { dimensions: ['region'], measures: ['revenue'] }; + +/** Build a RestServer with an optional analytics provider (positional arg #15). */ +function buildServer(analyticsProvider?: any) { + const server = mockServer(); + const rest = new RestServer( + server as any, mockProtocol() as any, {} as any, + undefined, undefined, undefined, undefined, undefined, undefined, undefined, + undefined, undefined, undefined, undefined, + analyticsProvider, + ); + rest.registerRoutes(); + const route = rest.getRoutes().find((r) => r.method === 'POST' && r.path.endsWith('/analytics/dataset/query')); + return { route }; +} + +describe('POST /analytics/dataset/query', () => { + it('registers the route', () => { + const { route } = buildServer(async () => ({ queryDataset: vi.fn() })); + expect(route).toBeTruthy(); + expect(route!.metadata?.tags).toContain('analytics'); + }); + + it('runs an inline dataset through the analytics service and returns rows', async () => { + const queryDataset = vi.fn().mockResolvedValue({ rows: [{ region: 'NA', revenue: 100 }], fields: [] }); + const { route } = buildServer(async () => ({ queryDataset })); + const res = mockRes(); + await route!.handler({ method: 'POST', params: {}, headers: {}, body: { dataset: inlineDataset, selection } } as any, res); + + expect(res.statusCode).toBe(200); + expect(res.body).toEqual({ rows: [{ region: 'NA', revenue: 100 }], fields: [] }); + // dataset was schema-validated before reaching the service (certified default applied) + const passedDataset = queryDataset.mock.calls[0][0]; + expect(passedDataset.measures[0].certified).toBe(false); + expect(queryDataset.mock.calls[0][1]).toEqual(selection); + }); + + it('returns 501 when no analytics service is configured', async () => { + const { route } = buildServer(undefined); + const res = mockRes(); + await route!.handler({ method: 'POST', params: {}, headers: {}, body: { dataset: inlineDataset, selection } } as any, res); + expect(res.statusCode).toBe(501); + expect(res.body.code).toBe('NOT_IMPLEMENTED'); + }); + + it('returns 400 when selection.measures is missing/empty', async () => { + const { route } = buildServer(async () => ({ queryDataset: vi.fn() })); + const res = mockRes(); + await route!.handler({ method: 'POST', params: {}, headers: {}, body: { dataset: inlineDataset, selection: { dimensions: ['region'] } } } as any, res); + expect(res.statusCode).toBe(400); + expect(res.body.code).toBe('VALIDATION_FAILED'); + }); + + it('returns 400 for an invalid dataset definition', async () => { + const { route } = buildServer(async () => ({ queryDataset: vi.fn() })); + const res = mockRes(); + const bad = { ...inlineDataset, measures: [{ name: 'x', aggregate: 'not_a_real_agg' }] }; + await route!.handler({ method: 'POST', params: {}, headers: {}, body: { dataset: bad, selection } } as any, res); + expect(res.statusCode).toBe(400); + expect(res.body.code).toBe('VALIDATION_FAILED'); + }); + + it('maps a dataset D-C compile error to 400 (undeclared relationship)', async () => { + const queryDataset = vi.fn().mockRejectedValue(new Error("dimension \"region\" references relationship \"account\" via \"account.region\", but \"account\" is not declared in the dataset's `include`.")); + const { route } = buildServer(async () => ({ queryDataset })); + const res = mockRes(); + await route!.handler({ method: 'POST', params: {}, headers: {}, body: { dataset: inlineDataset, selection } } as any, res); + expect(res.statusCode).toBe(400); + expect(res.body.code).toBe('DATASET_INVALID'); + }); +}); diff --git a/packages/rest/src/rest-api-plugin.ts b/packages/rest/src/rest-api-plugin.ts index 4ab302f39..5de10db84 100644 --- a/packages/rest/src/rest-api-plugin.ts +++ b/packages/rest/src/rest-api-plugin.ts @@ -154,6 +154,15 @@ export function createRestApiPlugin(config: RestApiPluginConfig = {}): Plugin { } catch { return undefined; } }; + // Analytics service resolver — used by /analytics/dataset/query + // (ADR-0021 dataset preview/query). Returns undefined when no + // analytics service is registered so the route fails cleanly (501). + const analyticsServiceProvider = async (_environmentId?: string): Promise => { + try { + return ctx.getService('analytics'); + } catch { return undefined; } + }; + if (!server) { ctx.logger.warn(`RestApiPlugin: HTTP Server service '${serverService}' not found. REST routes skipped.`); return; @@ -167,7 +176,7 @@ export function createRestApiPlugin(config: RestApiPluginConfig = {}): Plugin { ctx.logger.info('Hydrating REST API from Protocol...'); try { - const restServer = new RestServer(server, protocol, config.api as any, kernelManager, envRegistry, defaultEnvironmentIdProvider, authServiceProvider, objectQLProvider, emailServiceProvider, sharingServiceProvider, reportsServiceProvider, approvalsServiceProvider, sharingRulesServiceProvider, i18nServiceProvider); + const restServer = new RestServer(server, protocol, config.api as any, kernelManager, envRegistry, defaultEnvironmentIdProvider, authServiceProvider, objectQLProvider, emailServiceProvider, sharingServiceProvider, reportsServiceProvider, approvalsServiceProvider, sharingRulesServiceProvider, i18nServiceProvider, analyticsServiceProvider); restServer.registerRoutes(); ctx.logger.info('REST API successfully registered'); diff --git a/packages/rest/src/rest-server.ts b/packages/rest/src/rest-server.ts index beb862fa7..6ff9e07c3 100644 --- a/packages/rest/src/rest-server.ts +++ b/packages/rest/src/rest-server.ts @@ -508,6 +508,7 @@ export class RestServer { private approvalsServiceProvider?: (environmentId?: string) => Promise; private sharingRulesServiceProvider?: (environmentId?: string) => Promise; private i18nServiceProvider?: (environmentId?: string) => Promise; + private analyticsServiceProvider?: (environmentId?: string) => Promise; constructor( server: IHttpServer, @@ -524,6 +525,7 @@ export class RestServer { approvalsServiceProvider?: (environmentId?: string) => Promise, sharingRulesServiceProvider?: (environmentId?: string) => Promise, i18nServiceProvider?: (environmentId?: string) => Promise, + analyticsServiceProvider?: (environmentId?: string) => Promise, ) { this.protocol = protocol; this.config = this.normalizeConfig(config); @@ -539,6 +541,7 @@ export class RestServer { this.approvalsServiceProvider = approvalsServiceProvider; this.sharingRulesServiceProvider = sharingRulesServiceProvider; this.i18nServiceProvider = i18nServiceProvider; + this.analyticsServiceProvider = analyticsServiceProvider; } /** @@ -1268,6 +1271,7 @@ export class RestServer { this.registerSharingRuleEndpoints(bp); this.registerReportsEndpoints(bp); this.registerApprovalsEndpoints(bp); + this.registerAnalyticsEndpoints(bp); if (this.config.api.enableCrud) { this.registerCrudEndpoints(bp); } @@ -3531,6 +3535,99 @@ export class RestServer { * when no sharing service is configured so a deployment without the * `@objectstack/plugin-sharing` plugin fails cleanly. */ + /** + * ADR-0021 — analytics dataset preview/query endpoint. + * + * POST {basePath}/analytics/dataset/query + * body: { dataset?: , datasetName?: string, selection: DatasetSelection } + * + * Compiles the dataset (an inline draft for Studio preview, or a saved one + * by name) and runs the selection through the analytics service's + * `queryDataset`, threading the request ExecutionContext so tenant/RLS + * scoping (ADR-0021 D-C) applies. Returns 501 when no analytics service + * (or one without `queryDataset`) is configured, so a deployment without + * `@objectstack/service-analytics` fails cleanly. + */ + private registerAnalyticsEndpoints(basePath: string): void { + const isScoped = basePath.includes('/environments/:environmentId'); + const resolveService = async (environmentId?: string) => { + if (!this.analyticsServiceProvider) return undefined; + try { return await this.analyticsServiceProvider(environmentId); } + catch { return undefined; } + }; + + this.routeManager.register({ + method: 'POST', + path: `${basePath}/analytics/dataset/query`, + handler: async (req: any, res: any) => { + try { + const environmentId = isScoped ? req.params?.environmentId : undefined; + const context = await this.resolveExecCtx(environmentId, req); + if (this.enforceAuth(req, res, context)) return; + + const svc = await resolveService(environmentId); + if (!svc || typeof svc.queryDataset !== 'function') { + return res.status(501).json({ + code: 'NOT_IMPLEMENTED', + message: 'Analytics dataset query is not available on this deployment (no analytics service with queryDataset).', + }); + } + + const body = req.body ?? {}; + const selection = body.selection; + if (!selection || !Array.isArray(selection.measures) || selection.measures.length === 0) { + return res.status(400).json({ + code: 'VALIDATION_FAILED', + message: 'body.selection.measures must be a non-empty array of measure names.', + }); + } + + // Resolve the dataset definition: inline draft (Studio + // preview) or a saved dataset by name. + let dataset = body.dataset; + if (!dataset && body.datasetName) { + const p = await this.resolveProtocol(environmentId, req); + const items = await (p as any).getMetaItems?.({ type: 'dataset' }).catch(() => null); + const list = Array.isArray(items?.items) ? items.items : (Array.isArray(items) ? items : []); + dataset = list.find((d: any) => d?.name === body.datasetName); + if (!dataset) { + return res.status(404).json({ code: 'NOT_FOUND', message: `Dataset "${body.datasetName}" not found.` }); + } + } + if (!dataset) { + return res.status(400).json({ code: 'VALIDATION_FAILED', message: 'Provide body.dataset (inline) or body.datasetName.' }); + } + + // Validate against the spec schema so a malformed draft + // yields a clean 400 instead of a runtime throw. + try { + const { DatasetSchema } = await import('@objectstack/spec/ui'); + dataset = (DatasetSchema as any).parse(dataset); + } catch (verr: any) { + return res.status(400).json({ + code: 'VALIDATION_FAILED', + message: 'Invalid dataset definition.', + detail: String(verr?.message ?? verr).slice(0, 1000), + }); + } + + const result = await svc.queryDataset(dataset, selection, context ?? undefined); + res.json(result); + } catch (error: any) { + const msg = String(error?.message ?? error ?? ''); + // Dataset-compiler D-C / unsupported-aggregate / read-scope + // errors are client-side mistakes — surface as 400. + if (/not declared in the dataset|not backed by a declared relationship|not supported by the v1 dataset runtime|read-scope-sql/.test(msg)) { + return res.status(400).json({ code: 'DATASET_INVALID', message: msg.slice(0, 1000) }); + } + logError('[REST] Analytics dataset query error:', error); + res.status(500).json({ code: 'ANALYTICS_QUERY_FAILED', error: msg.slice(0, 500) }); + } + }, + metadata: { summary: 'Run a semantic-layer dataset (preview/query)', tags: ['analytics'] }, + }); + } + private registerSharingEndpoints(basePath: string): void { const { crud } = this.config; const dataPath = `${basePath}${crud.dataPrefix}`; diff --git a/packages/services/service-analytics/src/__tests__/dataset-compiler.test.ts b/packages/services/service-analytics/src/__tests__/dataset-compiler.test.ts new file mode 100644 index 000000000..6296fbdee --- /dev/null +++ b/packages/services/service-analytics/src/__tests__/dataset-compiler.test.ts @@ -0,0 +1,111 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import { DatasetSchema } from '@objectstack/spec/ui'; +import { compileDataset } from '../dataset-compiler.js'; + +/** A representative dataset: revenue by account.region (the ADR headline case). */ +const salesDataset = DatasetSchema.parse({ + name: 'sales', + label: 'Sales', + object: 'opportunity', + include: ['account'], + filter: { is_deleted: { $ne: true } }, + dimensions: [ + { name: 'region', label: 'Region', field: 'account.region', type: 'string' }, + { name: 'close_month', label: 'Close Month', field: 'close_date', type: 'date', dateGranularity: 'month' }, + ], + measures: [ + { name: 'revenue', label: 'Revenue', aggregate: 'sum', field: 'amount', certified: true, format: '$0,0.00' }, + { name: 'deal_count', label: 'Deals', aggregate: 'count', certified: false }, + { name: 'won_amount', label: 'Won', aggregate: 'sum', field: 'amount', certified: false, filter: { stage: 'won' } }, + { name: 'win_rate', label: 'Win Rate', aggregate: 'sum', certified: false, derived: { op: 'ratio', of: ['won_amount', 'revenue'] } }, + ], +}); + +describe('compileDataset', () => { + it('compiles a dataset to a Cube with dimensions and measures', () => { + const { cube } = compileDataset(salesDataset); + expect(cube.name).toBe('sales'); + expect(cube.sql).toBe('opportunity'); + + // Dimension with a relationship path keeps the dotted sql for the join machinery. + expect(cube.dimensions.region.sql).toBe('account.region'); + expect(cube.dimensions.region.type).toBe('string'); + + // Date dimension maps to a `time` Cube dimension with the declared granularity. + expect(cube.dimensions.close_month.type).toBe('time'); + expect(cube.dimensions.close_month.granularities).toEqual(['month']); + + // sum measure → metric sql is the field; format carried. + expect(cube.measures.revenue.type).toBe('sum'); + expect(cube.measures.revenue.sql).toBe('amount'); + expect(cube.measures.revenue.format).toBe('$0,0.00'); + + // count with no field → sql '*'. + expect(cube.measures.deal_count.type).toBe('count'); + expect(cube.measures.deal_count.sql).toBe('*'); + }); + + it('exposes the declared relationships as the join allowlist (D-C)', () => { + const { allowedRelationships } = compileDataset(salesDataset); + expect(allowedRelationships.has('account')).toBe(true); + expect(allowedRelationships.size).toBe(1); + }); + + it('carries dataset-level and per-measure filters separately', () => { + const { filter, measureFilters } = compileDataset(salesDataset); + expect(filter).toEqual({ is_deleted: { $ne: true } }); + expect(measureFilters.won_amount).toEqual({ stage: 'won' }); + }); + + it('extracts derived measures into the sidecar (not as Cube metrics)', () => { + const { cube, derived } = compileDataset(salesDataset); + expect(cube.measures.win_rate).toBeUndefined(); + expect(derived).toEqual([{ name: 'win_rate', op: 'ratio', of: ['won_amount', 'revenue'] }]); + }); + + it('rejects a dotted field that traverses an undeclared relationship (D-C)', () => { + const bad = DatasetSchema.parse({ + name: 'bad', + label: 'Bad', + object: 'opportunity', + include: [], // owner did NOT declare `account` + dimensions: [{ name: 'region', field: 'account.region' }], + measures: [{ name: 'cnt', aggregate: 'count' }], + }); + expect(() => compileDataset(bad)).toThrowError(/not declared in the dataset's `include`/); + }); + + it('validates declared relationships against the object graph when a resolver is given', () => { + const resolver = (obj: string, rel: string) => (obj === 'opportunity' && rel === 'account' ? 'account' : undefined); + expect(() => compileDataset(salesDataset, resolver)).not.toThrow(); + + const withBadInclude = DatasetSchema.parse({ ...salesDataset, include: ['nonexistent'] }); + expect(() => compileDataset(withBadInclude, resolver)).toThrowError(/does not exist on object/); + }); + + it('emits cube.joins with the resolved TARGET TABLE (alias ≠ table for namespaced objects)', () => { + // lookup field `account` on `opportunity` references object `crm_account`. + const resolver = (obj: string, rel: string) => + obj === 'opportunity' && rel === 'account' ? 'crm_account' : undefined; + const { cube } = compileDataset(salesDataset, resolver); + expect(cube.joins?.account?.name).toBe('crm_account'); + }); + + it('without a resolver, falls back to the relationship name as the join table', () => { + const { cube } = compileDataset(salesDataset); + expect(cube.joins?.account?.name).toBe('account'); + }); + + it('rejects v1-unsupported aggregates with a clear error', () => { + const ds = DatasetSchema.parse({ + name: 'agg', + label: 'Agg', + object: 'opportunity', + dimensions: [], + measures: [{ name: 'tags', aggregate: 'array_agg', field: 'tag' }], + }); + expect(() => compileDataset(ds)).toThrowError(/not supported by the v1 dataset runtime/); + }); +}); diff --git a/packages/services/service-analytics/src/__tests__/dataset-executor.test.ts b/packages/services/service-analytics/src/__tests__/dataset-executor.test.ts new file mode 100644 index 000000000..20cfb094b --- /dev/null +++ b/packages/services/service-analytics/src/__tests__/dataset-executor.test.ts @@ -0,0 +1,131 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect, vi } from 'vitest'; +import type { IAnalyticsService, AnalyticsQuery, AnalyticsResult } from '@objectstack/spec/contracts'; +import { DatasetSchema } from '@objectstack/spec/ui'; +import { compileDataset } from '../dataset-compiler.js'; +import { + DatasetExecutor, + evaluateDerivedMeasures, + combineFilters, + shiftRange, +} from '../dataset-executor.js'; + +describe('evaluateDerivedMeasures', () => { + const rows = [{ region: 'NA', won_amount: 60, total_amount: 100, a: 3, b: 4 }]; + + it('computes ratio with div-by-zero → null', () => { + expect(evaluateDerivedMeasures(rows, [{ name: 'wr', op: 'ratio', of: ['won_amount', 'total_amount'] }])[0].wr).toBe(0.6); + expect(evaluateDerivedMeasures([{ x: 1, y: 0 }], [{ name: 'r', op: 'ratio', of: ['x', 'y'] }])[0].r).toBeNull(); + }); + + it('computes sum / difference / product', () => { + expect(evaluateDerivedMeasures(rows, [{ name: 's', op: 'sum', of: ['a', 'b'] }])[0].s).toBe(7); + expect(evaluateDerivedMeasures(rows, [{ name: 'd', op: 'difference', of: ['b', 'a'] }])[0].d).toBe(1); + expect(evaluateDerivedMeasures(rows, [{ name: 'p', op: 'product', of: ['a', 'b'] }])[0].p).toBe(12); + }); + + it('yields null when an operand is missing', () => { + expect(evaluateDerivedMeasures([{ a: 1 }], [{ name: 'r', op: 'ratio', of: ['a', 'missing'] }])[0].r).toBeNull(); + }); +}); + +describe('combineFilters', () => { + it('ANDs two filters, passes through one, undefined for none', () => { + expect(combineFilters({ a: 1 }, { b: 2 })).toEqual({ $and: [{ a: 1 }, { b: 2 }] }); + expect(combineFilters({ a: 1 }, undefined)).toEqual({ a: 1 }); + expect(combineFilters(undefined, undefined)).toBeUndefined(); + }); +}); + +describe('shiftRange', () => { + it('previousPeriod = equal-length window ending the day before start', () => { + expect(shiftRange(['2026-01-01', '2026-01-31'], 'previousPeriod')).toEqual(['2025-12-01', '2025-12-31']); + }); + it('previousYear = same window minus one year', () => { + expect(shiftRange(['2026-03-01', '2026-03-31'], 'previousYear')).toEqual(['2025-03-01', '2025-03-31']); + }); +}); + +const dataset = DatasetSchema.parse({ + name: 'sales', label: 'Sales', object: 'opportunity', include: ['account'], + filter: { is_deleted: { $ne: true } }, + dimensions: [{ name: 'region', field: 'account.region', type: 'string' }], + measures: [ + { name: 'revenue', aggregate: 'sum', field: 'amount', certified: true }, + { name: 'won_amount', aggregate: 'sum', field: 'amount', filter: { stage: 'won' } }, + { name: 'win_rate', aggregate: 'sum', derived: { op: 'ratio', of: ['won_amount', 'revenue'] } }, + ], +}); + +function fakeService(handler: (q: AnalyticsQuery) => AnalyticsResult): IAnalyticsService { + return { + query: vi.fn(async (q: AnalyticsQuery) => handler(q)), + getMeta: async () => [], + }; +} + +describe('DatasetExecutor', () => { + it('combines dataset.filter with runtimeFilter and returns aggregated rows', async () => { + const svc = fakeService((q) => { + expect(q.cube).toBe('sales'); + expect(q.where).toEqual({ $and: [{ is_deleted: { $ne: true } }, { region: 'NA' }] }); + return { rows: [{ region: 'NA', revenue: 100 }], fields: [{ name: 'revenue', type: 'number' }] }; + }); + const compiled = compileDataset(dataset); + const res = await new DatasetExecutor(svc).execute(compiled, { + dimensions: ['region'], measures: ['revenue'], runtimeFilter: { region: 'NA' }, + }); + expect(res.rows).toEqual([{ region: 'NA', revenue: 100 }]); + }); + + it('runs a supplementary query for a measure-scoped filter and merges by dimension', async () => { + const seen: AnalyticsQuery[] = []; + const svc = fakeService((q) => { + seen.push(q); + if (q.measures.includes('revenue')) return { rows: [{ region: 'NA', revenue: 100 }], fields: [] }; + // won_amount query — scoped with stage=won + return { rows: [{ region: 'NA', won_amount: 60 }], fields: [] }; + }); + const compiled = compileDataset(dataset); + const res = await new DatasetExecutor(svc).execute(compiled, { + dimensions: ['region'], measures: ['revenue', 'won_amount'], + }); + // measure filter applied + const wonQuery = seen.find((q) => q.measures.includes('won_amount'))!; + expect(wonQuery.where).toEqual({ $and: [{ is_deleted: { $ne: true } }, { stage: 'won' }] }); + expect(res.rows[0]).toMatchObject({ region: 'NA', revenue: 100, won_amount: 60 }); + }); + + it('evaluates a derived measure from its (filtered + unfiltered) dependencies', async () => { + const svc = fakeService((q) => + q.measures.includes('won_amount') + ? { rows: [{ region: 'NA', won_amount: 60 }], fields: [] } + : { rows: [{ region: 'NA', revenue: 100 }], fields: [] }, + ); + const compiled = compileDataset(dataset); + const res = await new DatasetExecutor(svc).execute(compiled, { + dimensions: ['region'], measures: ['win_rate'], + }); + expect(res.rows[0].win_rate).toBe(0.6); + }); + + it('compareTo runs a shifted query and attaches __compare', async () => { + const seen: AnalyticsQuery[] = []; + const svc = fakeService((q) => { + seen.push(q); + const isShifted = JSON.stringify(q.timeDimensions).includes('2025-12'); + return { rows: [{ region: 'NA', revenue: isShifted ? 80 : 100 }], fields: [] }; + }); + const compiled = compileDataset(dataset); + const res = await new DatasetExecutor(svc).execute(compiled, { + dimensions: ['region'], measures: ['revenue'], + timeDimensions: [{ dimension: 'close_date', dateRange: ['2026-01-01', '2026-01-31'] }], + compareTo: { kind: 'previousPeriod', dimension: 'close_date' }, + }); + expect(res.rows[0]).toMatchObject({ region: 'NA', revenue: 100, revenue__compare: 80 }); + // the shifted query used the previous period + const shifted = seen.find((q) => JSON.stringify(q.timeDimensions).includes('2025-12'))!; + expect(shifted.timeDimensions![0].dateRange).toEqual(['2025-12-01', '2025-12-31']); + }); +}); diff --git a/packages/services/service-analytics/src/__tests__/dataset-rls-integration.test.ts b/packages/services/service-analytics/src/__tests__/dataset-rls-integration.test.ts new file mode 100644 index 000000000..733a3ec6d --- /dev/null +++ b/packages/services/service-analytics/src/__tests__/dataset-rls-integration.test.ts @@ -0,0 +1,117 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import { DatasetSchema } from '@objectstack/spec/ui'; +import type { ExecutionContext } from '@objectstack/spec/kernel'; +import type { FilterCondition } from '@objectstack/spec/data'; +import { AnalyticsService } from '../analytics-service.js'; +import { compileDataset } from '../dataset-compiler.js'; +import { DatasetExecutor } from '../dataset-executor.js'; + +/** + * R1 integration gate (ADR-0021 D-C). + * + * Wires the FULL dataset pipeline — compileDataset → AnalyticsService (with a + * CONTEXT-AWARE read-scope provider, like the runtime's sharing middleware) → + * NativeSQLStrategy → read-scope SQL compilation — and proves the cross-object + * "revenue by account.region" query is tenant-scoped on BOTH the base object + * (`opportunity`) AND the joined object (`account`), driven by the per-request + * ExecutionContext threaded through `execute(..., context)`. + */ + +const dataset = DatasetSchema.parse({ + name: 'sales', + label: 'Sales', + object: 'opportunity', + include: ['account'], + dimensions: [{ name: 'region', field: 'account.region', type: 'string' }], + measures: [{ name: 'revenue', aggregate: 'sum', field: 'amount', certified: true }], +}); + +/** + * A context-aware read-scope provider that mimics the runtime wiring: + * RLSCompiler-style output (a FilterCondition) derived from the request's tenant. + */ +function readScope(_object: string, context?: ExecutionContext): FilterCondition | undefined { + return context?.tenantId ? { organization_id: context.tenantId } : undefined; +} + +function makeExecutor(captured: { sql: string; params: unknown[] }[], withScope = true) { + const compiled = compileDataset(dataset); + const service = new AnalyticsService({ + cubes: [compiled.cube], + queryCapabilities: () => ({ nativeSql: true, objectqlAggregate: false, inMemory: false }), + executeRawSql: async (_obj, sql, params) => { captured.push({ sql, params }); return []; }, + getReadScope: withScope ? readScope : undefined, + getAllowedRelationships: () => compiled.allowedRelationships, + }); + return { compiled, executor: new DatasetExecutor(service) }; +} + +const ctx = (tenantId: string): ExecutionContext => ({ tenantId } as ExecutionContext); + +describe('Dataset RLS integration (R1 gate)', () => { + it('threads the request context and scopes BOTH opportunity and account by tenant', async () => { + const captured: { sql: string; params: unknown[] }[] = []; + const { compiled, executor } = makeExecutor(captured); + + await executor.execute(compiled, { dimensions: ['region'], measures: ['revenue'] }, ctx('org_A')); + + expect(captured).toHaveLength(1); + const { sql, params } = captured[0]; + expect(sql).toMatch(/"opportunity"\."organization_id" = \$\d+/); + expect(sql).toMatch(/"account"\."organization_id" = \$\d+/); + expect(params.filter((p) => p === 'org_A')).toHaveLength(2); + }); + + it('the SAME service instance isolates two tenants by context (singleton-safe)', async () => { + const capA: { sql: string; params: unknown[] }[] = []; + const { compiled, executor } = makeExecutor(capA); + + await executor.execute(compiled, { dimensions: ['region'], measures: ['revenue'] }, ctx('org_A')); + await executor.execute(compiled, { dimensions: ['region'], measures: ['revenue'] }, ctx('org_B')); + + expect(capA[0].params).toContain('org_A'); + expect(capA[0].params).not.toContain('org_B'); + expect(capA[1].params).toContain('org_B'); + expect(capA[1].params).not.toContain('org_A'); + }); + + it('DEMONSTRATES the leak the hook closes: no context/provider → no tenant predicate', async () => { + const captured: { sql: string; params: unknown[] }[] = []; + const { compiled, executor } = makeExecutor(captured, /* withScope */ false); + await executor.execute(compiled, { dimensions: ['region'], measures: ['revenue'] }, ctx('org_A')); + expect(captured[0].sql).not.toContain('organization_id'); + }); + + it('a provider that returns the RLS deny sentinel scopes to zero rows', async () => { + const captured: { sql: string; params: unknown[] }[] = []; + const compiled = compileDataset(dataset); + const service = new AnalyticsService({ + cubes: [compiled.cube], + queryCapabilities: () => ({ nativeSql: true, objectqlAggregate: false, inMemory: false }), + executeRawSql: async (_o, sql, params) => { captured.push({ sql, params }); return []; }, + getReadScope: () => ({ id: '__rls_deny__:00000000-0000-0000-0000-000000000000' }), + getAllowedRelationships: () => compiled.allowedRelationships, + }); + await new DatasetExecutor(service).execute(compiled, { dimensions: ['region'], measures: ['revenue'] }); + // Deny sentinel applied to base + joined object. + expect(captured[0].sql.match(/__rls_deny__/g) ?? []).toHaveLength(0); // value is parameterized, not inlined + expect(captured[0].params.filter((p) => String(p).startsWith('__rls_deny__'))).toHaveLength(2); + }); + + it('rejects the join when the relationship is not declared (defense in depth)', async () => { + const captured: { sql: string; params: unknown[] }[] = []; + const compiled = compileDataset(dataset); + const service = new AnalyticsService({ + cubes: [compiled.cube], + queryCapabilities: () => ({ nativeSql: true, objectqlAggregate: false, inMemory: false }), + executeRawSql: async (_o, sql, params) => { captured.push({ sql, params }); return []; }, + getReadScope: () => ({ organization_id: 'org_A' }), + getAllowedRelationships: () => new Set(), + }); + await expect( + new DatasetExecutor(service).execute(compiled, { dimensions: ['region'], measures: ['revenue'] }), + ).rejects.toThrow(/not backed by a declared relationship/); + }); +}); diff --git a/packages/services/service-analytics/src/__tests__/native-sql-rls.test.ts b/packages/services/service-analytics/src/__tests__/native-sql-rls.test.ts new file mode 100644 index 000000000..a2aca608d --- /dev/null +++ b/packages/services/service-analytics/src/__tests__/native-sql-rls.test.ts @@ -0,0 +1,110 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import type { Cube } from '@objectstack/spec/data'; +import type { AnalyticsQuery, StrategyContext } from '@objectstack/spec/contracts'; +import { NativeSQLStrategy } from '../strategies/native-sql-strategy.js'; + +/** opportunity cube with a relationship dimension (account.region). */ +const cube: Cube = { + name: 'sales', + title: 'Sales', + sql: 'opportunity', + measures: { revenue: { name: 'revenue', label: 'Revenue', type: 'sum', sql: 'amount' } }, + dimensions: { region: { name: 'region', label: 'Region', type: 'string', sql: 'account.region' } }, + public: false, +}; + +const query: AnalyticsQuery = { + cube: 'sales', + measures: ['revenue'], + dimensions: ['region'], + timezone: 'UTC', +}; + +function ctxWith(overrides: Partial): StrategyContext { + return { + getCube: (name) => (name === 'sales' ? cube : undefined), + queryCapabilities: () => ({ nativeSql: true, objectqlAggregate: false, inMemory: false }), + executeRawSql: async () => [], + ...overrides, + }; +} + +describe('NativeSQLStrategy — D-C RLS hardening', () => { + it('injects the tenant read scope for BOTH the base table and the joined object', async () => { + const strategy = new NativeSQLStrategy(); + const ctx = ctxWith({ + getReadScope: (obj) => ({ organization_id: `org_for_${obj}` }), + getAllowedRelationships: () => new Set(['account']), + }); + + const { sql, params } = await strategy.generateSql(query, ctx); + + // base table opportunity is scoped + expect(sql).toContain('"opportunity"."organization_id" ='); + // joined table account is scoped too — this is the bypass that D-C closes + expect(sql).toContain('"account"."organization_id" ='); + // both tenant params are bound + expect(params).toContain('org_for_opportunity'); + expect(params).toContain('org_for_account'); + }); + + it('rejects a join whose alias is not in the declared relationship allowlist', async () => { + const strategy = new NativeSQLStrategy(); + const ctx = ctxWith({ + // account is NOT allowed → the account.region join must be refused + getAllowedRelationships: () => new Set(), + }); + + await expect(strategy.generateSql(query, ctx)).rejects.toThrow( + /join "account" is not backed by a declared relationship/, + ); + }); + + it('allows the join when the relationship is declared', async () => { + const strategy = new NativeSQLStrategy(); + const ctx = ctxWith({ getAllowedRelationships: () => new Set(['account']) }); + const { sql } = await strategy.generateSql(query, ctx); + expect(sql).toContain('LEFT JOIN "account"'); + }); + + it('joins the resolved TARGET TABLE when cube.joins maps alias→table (namespaced)', async () => { + // alias `account` → table `crm_account` (what the dataset compiler emits). + const nsCube: Cube = { ...cube, joins: { account: { name: 'crm_account', relationship: 'many_to_one', sql: '' } } }; + const strategy = new NativeSQLStrategy(); + const ctx = ctxWith({ + getCube: (n) => (n === 'sales' ? nsCube : undefined), + getReadScope: (obj) => ({ organization_id: `org:${obj}` }), + getAllowedRelationships: () => new Set(['account']), + }); + const { sql, params } = await strategy.generateSql(query, ctx); + // join targets the real table, aliased to the relationship name + expect(sql).toContain('LEFT JOIN "crm_account" "account" ON "opportunity"."account" = "account"."id"'); + expect(sql).toContain('"account"."region"'); + // RLS scope for the joined object uses the TARGET object name (crm_account) + expect(params).toContain('org:crm_account'); + expect(params).toContain('org:opportunity'); + }); + + it('is backward-compatible: no scope hooks → no scope predicates, no allowlist check', async () => { + const strategy = new NativeSQLStrategy(); + const ctx = ctxWith({}); + const { sql } = await strategy.generateSql(query, ctx); + expect(sql).not.toContain('organization_id'); + expect(sql).toContain('LEFT JOIN "account"'); + }); + + it('renumbers scope params after existing filter params', async () => { + const strategy = new NativeSQLStrategy(); + const ctx = ctxWith({ + getReadScope: (obj) => (obj === 'opportunity' ? { organization_id: 'org1' } : undefined), + getAllowedRelationships: () => new Set(['account']), + }); + const filteredQuery: AnalyticsQuery = { ...query, where: { stage: 'won' } }; + const { sql, params } = await strategy.generateSql(filteredQuery, ctx); + // filter param bound first ($1), scope param second ($2) + expect(params).toEqual(['won', 'org1']); + expect(sql).toContain('$2'); + }); +}); diff --git a/packages/services/service-analytics/src/__tests__/query-dataset.test.ts b/packages/services/service-analytics/src/__tests__/query-dataset.test.ts new file mode 100644 index 000000000..b21fd3fbe --- /dev/null +++ b/packages/services/service-analytics/src/__tests__/query-dataset.test.ts @@ -0,0 +1,64 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import { DatasetSchema } from '@objectstack/spec/ui'; +import type { ExecutionContext } from '@objectstack/spec/kernel'; +import { AnalyticsService } from '../analytics-service.js'; + +const dataset = DatasetSchema.parse({ + name: 'sales', + label: 'Sales', + object: 'opportunity', + include: ['account'], + dimensions: [{ name: 'region', field: 'account.region', type: 'string' }], + measures: [{ name: 'revenue', aggregate: 'sum', field: 'amount', certified: true }], +}); + +function service(captured: { sql: string; params: unknown[] }[]) { + return new AnalyticsService({ + queryCapabilities: () => ({ nativeSql: true, objectqlAggregate: false, inMemory: false }), + executeRawSql: async (_o, sql, params) => { captured.push({ sql, params }); return [{ region: 'NA', revenue: 100 }]; }, + getReadScope: (_o, ctx?: ExecutionContext) => (ctx?.tenantId ? { organization_id: ctx.tenantId } : undefined), + }); +} + +describe('AnalyticsService.queryDataset', () => { + it('compiles an inline dataset, runs it, and returns rows', async () => { + const captured: { sql: string; params: unknown[] }[] = []; + const result = await service(captured).queryDataset( + dataset, + { dimensions: ['region'], measures: ['revenue'] }, + { tenantId: 'org_A' } as ExecutionContext, + ); + expect(result.rows).toEqual([{ region: 'NA', revenue: 100 }]); + }); + + it('auto-wires the join allowlist from the compiled dataset (D-C) — declared join allowed', async () => { + const captured: { sql: string; params: unknown[] }[] = []; + await service(captured).queryDataset(dataset, { dimensions: ['region'], measures: ['revenue'] }, { tenantId: 'org_A' } as ExecutionContext); + // account join present + both tables tenant-scoped, with no getAllowedRelationships config passed. + expect(captured[0].sql).toContain('LEFT JOIN "account"'); + expect(captured[0].sql).toMatch(/"opportunity"\."organization_id"/); + expect(captured[0].sql).toMatch(/"account"\."organization_id"/); + }); + + it('rejects an inline dataset whose dimension traverses an undeclared relationship', async () => { + const bad = DatasetSchema.parse({ + name: 'bad', label: 'Bad', object: 'opportunity', include: [], + dimensions: [{ name: 'region', field: 'account.region' }], + measures: [{ name: 'cnt', aggregate: 'count' }], + }); + await expect( + service([]).queryDataset(bad, { dimensions: ['region'], measures: ['cnt'] }), + ).rejects.toThrow(/not declared in the dataset's `include`/); + }); + + it('pre-registered datasets (config.datasets) are compiled at construction', () => { + const svc = new AnalyticsService({ + datasets: [dataset], + queryCapabilities: () => ({ nativeSql: true, objectqlAggregate: false, inMemory: false }), + executeRawSql: async () => [], + }); + expect(svc.cubeRegistry.has('sales')).toBe(true); + }); +}); diff --git a/packages/services/service-analytics/src/__tests__/read-scope-sql.test.ts b/packages/services/service-analytics/src/__tests__/read-scope-sql.test.ts new file mode 100644 index 000000000..a5cae29d0 --- /dev/null +++ b/packages/services/service-analytics/src/__tests__/read-scope-sql.test.ts @@ -0,0 +1,92 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import { compileScopedFilterToSql } from '../read-scope-sql.js'; + +describe('compileScopedFilterToSql', () => { + it('implicit equality → qualified column = ?', () => { + expect(compileScopedFilterToSql({ organization_id: 'org_A' }, 'opportunity')).toEqual({ + sql: '"opportunity"."organization_id" = ?', + params: ['org_A'], + }); + }); + + it('the RLS deny sentinel compiles to an id equality (matches nothing in practice)', () => { + const r = compileScopedFilterToSql({ id: '__rls_deny__:00000000-0000-0000-0000-000000000000' }, 'opportunity'); + expect(r.sql).toBe('"opportunity"."id" = ?'); + expect(r.params).toEqual(['__rls_deny__:00000000-0000-0000-0000-000000000000']); + }); + + it('$in → IN (?, ?)', () => { + expect(compileScopedFilterToSql({ owner_id: { $in: ['u1', 'u2'] } }, 'sys_user')).toEqual({ + sql: '"sys_user"."owner_id" IN (?, ?)', + params: ['u1', 'u2'], + }); + }); + + it('empty $in → 1 = 0 (matches no rows, fail-safe)', () => { + expect(compileScopedFilterToSql({ owner_id: { $in: [] } }, 't').sql).toBe('1 = 0'); + }); + + it('$or combines multiple policies', () => { + const r = compileScopedFilterToSql( + { $or: [{ organization_id: 'org_A' }, { is_public: true }] }, + 'doc', + ); + expect(r.sql).toBe('("doc"."organization_id" = ? OR "doc"."is_public" = ?)'); + expect(r.params).toEqual(['org_A', true]); + }); + + it('$and + nested $or', () => { + const r = compileScopedFilterToSql( + { $and: [{ organization_id: 'org_A' }, { $or: [{ owner_id: 'u1' }, { shared: true }] }] }, + 'rec', + ); + expect(r.sql).toBe('("rec"."organization_id" = ? AND ("rec"."owner_id" = ? OR "rec"."shared" = ?))'); + expect(r.params).toEqual(['org_A', 'u1', true]); + }); + + it('null → IS NULL; $ne null → IS NOT NULL', () => { + expect(compileScopedFilterToSql({ deleted_at: null }, 't').sql).toBe('"t"."deleted_at" IS NULL'); + expect(compileScopedFilterToSql({ deleted_at: { $ne: null } }, 't').sql).toBe('"t"."deleted_at" IS NOT NULL'); + }); + + it('comparison + string operators', () => { + expect(compileScopedFilterToSql({ amount: { $gte: 100 } }, 't').sql).toBe('"t"."amount" >= ?'); + expect(compileScopedFilterToSql({ name: { $startsWith: 'A' } }, 't')).toEqual({ + sql: '"t"."name" LIKE ?', params: ['A%'], + }); + }); + + it('multiple operators on one field are ANDed', () => { + const r = compileScopedFilterToSql({ amount: { $gte: 10, $lte: 100 } }, 't'); + expect(r.sql).toBe('("t"."amount" >= ? AND "t"."amount" <= ?)'); + expect(r.params).toEqual([10, 100]); + }); + + // ── fail-closed guarantees (security) ────────────────────────────────────── + + it('THROWS on an unsafe field identifier (injection guard)', () => { + expect(() => compileScopedFilterToSql({ 'id; DROP TABLE x': 'v' }, 't')).toThrowError(/unsafe field identifier/); + }); + + it('THROWS on an unsafe alias identifier', () => { + expect(() => compileScopedFilterToSql({ id: 'v' }, 'a"; DROP')).toThrowError(/unsafe alias identifier/); + }); + + it('THROWS on an unknown operator (never silently drops a predicate)', () => { + expect(() => compileScopedFilterToSql({ f: { $regex: '.*' } }, 't')).toThrowError(/unsupported operator/); + }); + + it('THROWS on a nested relation value (cannot join in a flat scope)', () => { + expect(() => compileScopedFilterToSql({ account: { region: 'NA' } }, 't')).toThrowError(/nested\/relation value/); + }); + + it('THROWS on an empty $and (degenerate, fail-closed)', () => { + expect(() => compileScopedFilterToSql({ $and: [] }, 't')).toThrowError(/non-empty array/); + }); + + it('THROWS on a non-object read scope', () => { + expect(() => compileScopedFilterToSql('nope' as never, 't')).toThrowError(/must be a filter object/); + }); +}); diff --git a/packages/services/service-analytics/src/analytics-service.ts b/packages/services/service-analytics/src/analytics-service.ts index 042f55233..7beb11adc 100644 --- a/packages/services/service-analytics/src/analytics-service.ts +++ b/packages/services/service-analytics/src/analytics-service.ts @@ -5,14 +5,19 @@ import type { AnalyticsQuery, AnalyticsResult, CubeMeta, + DatasetSelection, } from '@objectstack/spec/contracts'; -import type { Cube } from '@objectstack/spec/data'; +import type { Cube, FilterCondition } from '@objectstack/spec/data'; +import type { ExecutionContext } from '@objectstack/spec/kernel'; +import type { Dataset } from '@objectstack/spec/ui'; import type { Logger } from '@objectstack/spec/contracts'; import { createLogger } from '@objectstack/core'; import { CubeRegistry } from './cube-registry.js'; import type { AnalyticsStrategy, DriverCapabilities, StrategyContext } from './strategies/types.js'; import { NativeSQLStrategy } from './strategies/native-sql-strategy.js'; import { ObjectQLStrategy } from './strategies/objectql-strategy.js'; +import { compileDataset, type CompiledDataset, type RelationshipResolver } from './dataset-compiler.js'; +import { DatasetExecutor } from './dataset-executor.js'; /** * Configuration for AnalyticsService. @@ -51,6 +56,30 @@ export interface AnalyticsServiceConfig { * They are merged with the built-in strategies and sorted by priority. */ strategies?: AnalyticsStrategy[]; + /** + * ADR-0021 D-C — context-aware per-object read scope (tenant + RLS). Supplied + * by the runtime that owns the sharing middleware; receives the current + * request's ExecutionContext and returns the RLS `FilterCondition` for the + * object (exactly what `RLSCompiler` emits). The service binds the active + * context per query and the strategy compiles the filter into alias-qualified + * SQL injected into every base and joined table. + */ + getReadScope?: (objectName: string, context?: ExecutionContext) => FilterCondition | null | undefined; + /** + * ADR-0021 D-C — join allowlist per cube (the dataset's declared `include`). + * Joins outside this set are rejected by the strategy. Compiled datasets + * (via `queryDataset`/`registerDataset`) supply this automatically; this + * config hook is a fallback for legacy hand-authored cubes. + */ + getAllowedRelationships?: (cubeName: string) => Set | undefined; + /** + * ADR-0021 — optional object-graph resolver used when compiling datasets: + * `(baseObject, relationshipName) => relatedObjectName | undefined`. When + * provided, `queryDataset` validates that every declared `include` exists. + */ + relationshipResolver?: RelationshipResolver; + /** Pre-defined datasets to compile + register at construction (ADR-0021). */ + datasets?: Dataset[]; } /** @@ -82,7 +111,14 @@ const DEFAULT_CAPABILITIES: DriverCapabilities = { */ export class AnalyticsService implements IAnalyticsService { private readonly strategies: AnalyticsStrategy[]; - private readonly strategyCtx: StrategyContext; + /** Context-independent part of the StrategyContext (no per-request scope). */ + private readonly baseCtx: StrategyContext; + /** Context-aware read-scope provider (bound to the request's context per call). */ + private readonly readScopeProvider?: AnalyticsServiceConfig['getReadScope']; + /** Compiled datasets by name — feeds the join allowlist (D-C) and queryDataset. */ + private readonly datasetRegistry = new Map(); + /** Optional object-graph resolver used when compiling datasets. */ + private readonly relationshipResolver?: RelationshipResolver; readonly cubeRegistry: CubeRegistry; private readonly logger: Logger; @@ -95,13 +131,33 @@ export class AnalyticsService implements IAnalyticsService { this.cubeRegistry.registerAll(config.cubes); } - // Build strategy context - this.strategyCtx = { + this.readScopeProvider = config.getReadScope; + this.relationshipResolver = config.relationshipResolver; + + // Compile + register pre-defined datasets (ADR-0021). + if (config.datasets) { + for (const ds of config.datasets) { + try { + this.registerDataset(ds); + } catch (e) { + this.logger?.warn?.(`[Analytics] Failed to register dataset "${ds?.name}": ${String((e as Error)?.message ?? e)}`); + } + } + } + + // Build the context-independent strategy context. `getReadScope` is bound + // per query in `callCtx(context)` so it can resolve the active tenant. + this.baseCtx = { getCube: (name) => this.cubeRegistry.get(name), queryCapabilities: config.queryCapabilities || (() => DEFAULT_CAPABILITIES), executeRawSql: config.executeRawSql, executeAggregate: config.executeAggregate, fallbackService: config.fallbackService, + // Prefer a compiled dataset's declared relationships (D-C join allowlist); + // fall back to any explicitly-configured provider for legacy cubes. + getAllowedRelationships: (cubeName: string) => + this.datasetRegistry.get(cubeName)?.allowedRelationships + ?? config.getAllowedRelationships?.(cubeName), }; // Build strategy chain (built-in + custom, sorted by priority) @@ -127,19 +183,61 @@ export class AnalyticsService implements IAnalyticsService { ); } + /** + * Build a per-call StrategyContext that binds the read-scope provider to the + * current request's ExecutionContext (ADR-0021 D-C). The strategy then sees a + * `getReadScope(objectName)` that already knows the active tenant. + */ + private callCtx(context?: ExecutionContext): StrategyContext { + if (!this.readScopeProvider) return this.baseCtx; + return { + ...this.baseCtx, + getReadScope: (objectName: string) => this.readScopeProvider!(objectName, context), + }; + } + /** * Execute an analytical query by delegating to the first capable strategy. */ - async query(query: AnalyticsQuery): Promise { + async query(query: AnalyticsQuery, context?: ExecutionContext): Promise { if (!query.cube) { throw new Error('Cube name is required in analytics query'); } this.ensureCube(query); - const strategy = this.resolveStrategy(query); + const ctx = this.callCtx(context); + const strategy = this.resolveStrategy(query, ctx); this.logger.debug(`[Analytics] Query on cube "${query.cube}" → ${strategy.name}`); - return strategy.execute(query, this.strategyCtx); + return strategy.execute(query, ctx); + } + + /** + * Compile a `dataset` (ADR-0021) and register its Cube + join allowlist so it + * can be queried by name. Idempotent (re-registering overwrites). Returns the + * compiled dataset. + */ + registerDataset(dataset: Dataset): CompiledDataset { + const compiled = compileDataset(dataset, this.relationshipResolver); + this.cubeRegistry.register(compiled.cube); + this.datasetRegistry.set(dataset.name, compiled); + return compiled; + } + + /** + * Execute a semantic-layer dataset (ADR-0021). Compiles the dataset (saved or + * inline draft — Studio preview), registers its Cube + join allowlist, then + * runs the selection through the `DatasetExecutor` with the request context so + * tenant/RLS scoping (D-C) is applied. See {@link IAnalyticsService.queryDataset}. + */ + async queryDataset( + dataset: Dataset, + selection: DatasetSelection, + context?: ExecutionContext, + ): Promise { + const compiled = this.registerDataset(dataset); + this.logger.debug(`[Analytics] queryDataset "${dataset.name}" (object=${dataset.object}, include=${(dataset.include ?? []).join(',') || '—'})`); + return new DatasetExecutor(this).execute(compiled, selection, context); } /** @@ -170,16 +268,17 @@ export class AnalyticsService implements IAnalyticsService { /** * Generate SQL for a query without executing it (dry-run). */ - async generateSql(query: AnalyticsQuery): Promise<{ sql: string; params: unknown[] }> { + async generateSql(query: AnalyticsQuery, context?: ExecutionContext): Promise<{ sql: string; params: unknown[] }> { if (!query.cube) { throw new Error('Cube name is required for SQL generation'); } this.ensureCube(query); - const strategy = this.resolveStrategy(query); + const ctx = this.callCtx(context); + const strategy = this.resolveStrategy(query, ctx); this.logger.debug(`[Analytics] generateSql on cube "${query.cube}" → ${strategy.name}`); - return strategy.generateSql(query, this.strategyCtx); + return strategy.generateSql(query, ctx); } // ── Internal ───────────────────────────────────────────────────── @@ -292,9 +391,9 @@ export class AnalyticsService implements IAnalyticsService { /** * Walk the strategy chain and return the first strategy that can handle the query. */ - private resolveStrategy(query: AnalyticsQuery): AnalyticsStrategy { + private resolveStrategy(query: AnalyticsQuery, ctx: StrategyContext): AnalyticsStrategy { for (const strategy of this.strategies) { - if (strategy.canHandle(query, this.strategyCtx)) { + if (strategy.canHandle(query, ctx)) { return strategy; } } diff --git a/packages/services/service-analytics/src/dataset-compiler.ts b/packages/services/service-analytics/src/dataset-compiler.ts new file mode 100644 index 000000000..083573d7e --- /dev/null +++ b/packages/services/service-analytics/src/dataset-compiler.ts @@ -0,0 +1,194 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import type { Cube, Metric, Dimension as CubeDimension, CubeJoin } from '@objectstack/spec/data'; +import type { Dataset, DatasetMeasure, DatasetDimension } from '@objectstack/spec/ui'; +import type { FilterCondition } from '@objectstack/spec/data'; + +/** + * Dataset → Cube compiler (ADR-0021 D-A=(c), WS2). + * + * Lowers a declarative `dataset` (base object + included relationships + + * declared dimensions/measures + derived measures) into the existing Cube + * analytics runtime model. The author never writes an `ON` clause: joins are + * DERIVED from the `include` relationship names and the dotted `relationship.field` + * references on dimensions/measures, matching the NativeSQLStrategy convention + * `. = .id`. + * + * Safety (D-C): every dotted field reference must point at a relationship that + * the dataset explicitly declared in `include`; otherwise the compile fails. + * The returned `allowedRelationships` set is the join allowlist the strategy + * enforces at SQL-build time. + */ + +/** Operators v1 does NOT compile to the Cube SQL switch — surfaced as a clear error. */ +const UNSUPPORTED_AGGREGATES = new Set(['array_agg', 'string_agg']); + +export interface DerivedMeasureSpec { + name: string; + op: 'ratio' | 'sum' | 'difference' | 'product'; + of: string[]; +} + +export interface CompiledDataset { + /** The Cube the dataset compiles to (consumed by the strategy chain). */ + cube: Cube; + /** + * Relationship names declared in `include`. The join allowlist (D-C): + * the NativeSQLStrategy rejects any join alias not in this set. + */ + allowedRelationships: Set; + /** Derived measures, computed post-aggregation by the executor (Q1). */ + derived: DerivedMeasureSpec[]; + /** Definition-level filter (the dataset's intrinsic scope). */ + filter?: FilterCondition; + /** Per-measure scoped filters, keyed by measure name (applied by executor). */ + measureFilters: Record; +} + +/** + * Resolves a relationship name on a base object to the related object/table + * name, using the runtime's object graph. Optional: when omitted the compiler + * trusts the declared `include` names (the NativeSQLStrategy convention assumes + * the relationship name equals the related table name). + */ +export type RelationshipResolver = ( + baseObject: string, + relationshipName: string, +) => string | undefined; + +/** Map a dataset measure's aggregate to the Cube metric `type`. */ +function aggregateToMetricType(m: DatasetMeasure): Metric['type'] { + if (UNSUPPORTED_AGGREGATES.has(m.aggregate)) { + throw new Error( + `[dataset-compiler] measure "${m.name}" uses aggregate "${m.aggregate}" which is ` + + `not supported by the v1 dataset runtime (supported: count, sum, avg, min, max, count_distinct).`, + ); + } + return m.aggregate as Metric['type']; +} + +/** Map a dataset dimension type to the Cube dimension `type`. */ +function dimensionType(d: DatasetDimension): CubeDimension['type'] { + switch (d.type) { + case 'date': return 'time'; + case 'number': return 'number'; + case 'boolean': return 'boolean'; + case 'lookup': return 'string'; + case 'string': return 'string'; + default: return 'string'; + } +} + +/** The relationship prefix of a dotted `relationship.field` path, or null. */ +function relationshipPrefix(field: string): string | null { + const idx = field.indexOf('.'); + return idx > 0 ? field.slice(0, idx) : null; +} + +export function compileDataset( + dataset: Dataset, + resolver?: RelationshipResolver, +): CompiledDataset { + const include = dataset.include ?? []; + const allowedRelationships = new Set(include); + + // Resolve each declared relationship to its TARGET TABLE and emit a Cube join. + // The relationship name (a lookup/master_detail field on the base object) is + // used as the join ALIAS, but the joined TABLE is the related object — these + // differ when objects are namespaced (e.g. lookup field `account` → + // table `crm_account`). Without resolving the table, the strategy would join a + // non-existent `"account"` table. When no resolver is supplied the relationship + // name is assumed to equal the table name (legacy convention / unit tests). + const joins: Record = {}; + for (const rel of include) { + let targetTable: string = rel; + if (resolver) { + const resolved = resolver(dataset.object, rel); + if (!resolved) { + throw new Error( + `[dataset-compiler] dataset "${dataset.name}" includes relationship "${rel}" ` + + `which does not exist on object "${dataset.object}".`, + ); + } + targetTable = resolved; + } + // `name` carries the join TABLE; the strategy derives the ON clause from the + // relationship-name convention (`. = .id`). + joins[rel] = { + name: targetTable, + relationship: 'many_to_one', + sql: `${dataset.object}.${rel} = ${rel}.id`, + }; + } + + // Assert any dotted field only traverses a DECLARED relationship (D-C). + const assertDeclared = (field: string, ownerKind: string, ownerName: string) => { + const prefix = relationshipPrefix(field); + if (prefix && !allowedRelationships.has(prefix)) { + throw new Error( + `[dataset-compiler] ${ownerKind} "${ownerName}" references relationship "${prefix}" ` + + `via "${field}", but "${prefix}" is not declared in the dataset's \`include\`. ` + + `v1 only joins along declared relationships.`, + ); + } + }; + + // Compile dimensions. + const dimensions: Record = {}; + for (const d of dataset.dimensions) { + assertDeclared(d.field, 'dimension', d.name); + const dim: CubeDimension = { + name: d.name, + label: typeof d.label === 'string' ? d.label : d.name, + type: dimensionType(d), + sql: d.field, + }; + if (dim.type === 'time') { + dim.granularities = d.dateGranularity + ? [d.dateGranularity] + : ['day', 'week', 'month', 'quarter', 'year']; + } + dimensions[d.name] = dim; + } + + // Compile measures (non-derived → Cube metrics; derived → sidecar). + const measures: Record = {}; + const derived: DerivedMeasureSpec[] = []; + const measureFilters: Record = {}; + + for (const m of dataset.measures) { + if (m.derived) { + derived.push({ name: m.name, op: m.derived.op, of: m.derived.of }); + continue; + } + if (m.field) assertDeclared(m.field, 'measure', m.name); + const metric: Metric = { + name: m.name, + label: typeof m.label === 'string' ? m.label : m.name, + type: aggregateToMetricType(m), + // `count` with no field aggregates over rows (*). + sql: m.field ?? '*', + }; + if (typeof m.format === 'string') metric.format = m.format; + measures[m.name] = metric; + if (m.filter) measureFilters[m.name] = m.filter; + } + + const cube: Cube = { + name: dataset.name, + title: typeof dataset.label === 'string' ? dataset.label : dataset.name, + sql: dataset.object, + measures, + dimensions, + public: false, + }; + if (Object.keys(joins).length > 0) cube.joins = joins; + + return { + cube, + allowedRelationships, + derived, + filter: dataset.filter, + measureFilters, + }; +} diff --git a/packages/services/service-analytics/src/dataset-executor.ts b/packages/services/service-analytics/src/dataset-executor.ts new file mode 100644 index 000000000..96b09a1c3 --- /dev/null +++ b/packages/services/service-analytics/src/dataset-executor.ts @@ -0,0 +1,299 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import type { + IAnalyticsService, + AnalyticsQuery, + AnalyticsResult, + DatasetSelection, + DatasetCompareTo, +} from '@objectstack/spec/contracts'; +import type { FilterCondition } from '@objectstack/spec/data'; +import type { ExecutionContext } from '@objectstack/spec/kernel'; +import type { CompiledDataset, DerivedMeasureSpec } from './dataset-compiler.js'; + +// Re-export the shared protocol shapes so existing importers keep working. +export type { DatasetSelection } from '@objectstack/spec/contracts'; +/** @deprecated use DatasetCompareTo from @objectstack/spec/contracts */ +export type CompareTo = DatasetCompareTo; + +/** + * Dataset executor (ADR-0021 WS2). + * + * Turns a compiled dataset + a presentation's selection (dimensions, measures, + * runtime filter, compareTo) into one or more `AnalyticsQuery`s against the Cube + * runtime, then post-processes the results: + * - resolves the base measures a selection needs (including derived deps), + * - applies measure-scoped filters via supplementary grouped queries, + * - evaluates derived measures (ratio/sum/difference/product) row-by-row (Q1), + * - shifts the query for `compareTo` (previousPeriod / previousYear) and + * attaches `__compare` columns. + * + * RLS/tenant scoping is NOT handled here — it is enforced inside the strategy + * via the StrategyContext read-scope hook (D-C). This layer is pure query + * shaping + arithmetic. + */ + +/** AND two optional FilterConditions into one (MongoDB-style). */ +export function combineFilters( + a?: FilterCondition, + b?: FilterCondition, +): FilterCondition | undefined { + if (a && b) return { $and: [a, b] } as FilterCondition; + return a ?? b; +} + +/** + * Evaluate derived measures on each aggregated row, mutating a shallow copy. + * Division by zero (and missing operands) yields `null` rather than Infinity/NaN. + */ +export function evaluateDerivedMeasures( + rows: Record[], + derived: DerivedMeasureSpec[], +): Record[] { + if (derived.length === 0) return rows; + return rows.map((row) => { + const out = { ...row }; + for (const d of derived) { + out[d.name] = computeDerived(d, out); + } + return out; + }); +} + +function num(v: unknown): number | null { + if (v == null) return null; + const n = typeof v === 'number' ? v : Number(v); + return Number.isFinite(n) ? n : null; +} + +function computeDerived(d: DerivedMeasureSpec, row: Record): number | null { + const vals = d.of.map((name) => num(row[name])); + if (vals.some((v) => v === null)) return null; + const nums = vals as number[]; + switch (d.op) { + case 'ratio': { + if (nums.length < 2 || nums[1] === 0) return null; + return nums[0] / nums[1]; + } + case 'difference': + return nums.slice(1).reduce((acc, v) => acc - v, nums[0]); + case 'sum': + return nums.reduce((acc, v) => acc + v, 0); + case 'product': + return nums.reduce((acc, v) => acc * v, 1); + default: + return null; + } +} + +// ── compareTo date math (deterministic — no Date.now) ──────────────────────── + +function parseUTC(date: string): number { + // Accepts 'YYYY-MM-DD' (and ISO datetimes); interpreted as UTC. + const ms = Date.parse(date.length === 10 ? `${date}T00:00:00Z` : date); + if (Number.isNaN(ms)) throw new Error(`[dataset-executor] invalid date in dateRange: "${date}"`); + return ms; +} + +const DAY_MS = 86_400_000; + +function toISODate(ms: number): string { + return new Date(ms).toISOString().slice(0, 10); +} + +function shiftYear(date: string, years: number): string { + const d = new Date(parseUTC(date)); + d.setUTCFullYear(d.getUTCFullYear() + years); + return toISODate(d.getTime()); +} + +/** Compute the comparison window for a [start,end] range. */ +export function shiftRange(range: [string, string], kind: CompareTo['kind']): [string, string] { + const [start, end] = range; + if (kind === 'previousYear') { + return [shiftYear(start, -1), shiftYear(end, -1)]; + } + // previousPeriod — the equal-length window ending the day before `start`. + const startMs = parseUTC(start); + const endMs = parseUTC(end); + const lengthDays = Math.round((endMs - startMs) / DAY_MS) + 1; + const prevEndMs = startMs - DAY_MS; + const prevStartMs = prevEndMs - (lengthDays - 1) * DAY_MS; + return [toISODate(prevStartMs), toISODate(prevEndMs)]; +} + +export class DatasetExecutor { + constructor(private readonly service: IAnalyticsService) {} + + /** + * Execute a dataset selection and return the shaped rows (+ field metadata). + * + * @param context - The request's ExecutionContext, threaded into every + * underlying `IAnalyticsService.query` so the tenant/RLS read scope is + * applied per request (ADR-0021 D-C). + */ + async execute( + compiled: CompiledDataset, + selection: DatasetSelection, + context?: ExecutionContext, + ): Promise { + const derivedByName = new Map(compiled.derived.map((d) => [d.name, d])); + const selectedDerived = selection.measures + .map((m) => derivedByName.get(m)) + .filter((d): d is DerivedMeasureSpec => !!d); + + // Base measures = selected non-derived + dependencies of selected derived. + const baseMeasures = new Set(); + for (const m of selection.measures) { + if (!derivedByName.has(m)) baseMeasures.add(m); + } + for (const d of selectedDerived) { + for (const dep of d.of) baseMeasures.add(dep); + } + + // Split measures into those with a scoped filter and those without. + const unfiltered: string[] = []; + const filtered: string[] = []; + for (const m of baseMeasures) { + (compiled.measureFilters[m] ? filtered : unfiltered).push(m); + } + + const baseFilter = combineFilters(compiled.filter, selection.runtimeFilter); + const dimensions = selection.dimensions ?? []; + + // Primary query: all unfiltered base measures in one pass. When every base + // measure is filter-scoped, the supplementary queries below build the grid. + let result: AnalyticsResult; + if (unfiltered.length > 0 || filtered.length === 0) { + result = await this.service.query(this.buildQuery(compiled, { + measures: unfiltered, + dimensions, + where: baseFilter, + selection, + }), context); + } else { + result = { rows: [], fields: [] }; + } + + // Supplementary queries: one per measure-scoped filter, merged by dimension key. + for (const m of filtered) { + const mFilter = combineFilters(baseFilter, compiled.measureFilters[m]); + const sub = await this.service.query(this.buildQuery(compiled, { + measures: [m], dimensions, where: mFilter, selection, + }), context); + result.rows = mergeByDimensions(result.rows, sub.rows, dimensions, [m]); + result.fields.push({ name: m, type: 'number' }); + } + + // compareTo — run a shifted query over the same base measures and attach. + if (selection.compareTo) { + const compareRows = await this.runCompare(compiled, selection, [...baseMeasures], dimensions, baseFilter, context); + result.rows = mergeByDimensions( + result.rows, + compareRows, + dimensions, + [...baseMeasures].map((m) => `${m}__compare`), + ); + for (const m of baseMeasures) result.fields.push({ name: `${m}__compare`, type: 'number' }); + } + + // Derived measures (computed from base + compare columns already present). + result.rows = evaluateDerivedMeasures(result.rows, selectedDerived); + for (const d of selectedDerived) result.fields.push({ name: d.name, type: 'number' }); + + return result; + } + + private buildQuery( + compiled: CompiledDataset, + opts: { + measures: string[]; + dimensions: string[]; + where?: FilterCondition; + selection: DatasetSelection; + }, + ): AnalyticsQuery { + const q: AnalyticsQuery = { + cube: compiled.cube.name, + measures: opts.measures, + dimensions: opts.dimensions, + timezone: opts.selection.timezone ?? 'UTC', + }; + if (opts.where) q.where = opts.where as Record; + if (opts.selection.timeDimensions) q.timeDimensions = opts.selection.timeDimensions; + if (opts.selection.order) q.order = opts.selection.order; + if (opts.selection.limit != null) q.limit = opts.selection.limit; + if (opts.selection.offset != null) q.offset = opts.selection.offset; + return q; + } + + private async runCompare( + compiled: CompiledDataset, + selection: DatasetSelection, + measures: string[], + dimensions: string[], + baseFilter: FilterCondition | undefined, + context?: ExecutionContext, + ): Promise[]> { + const cmp = selection.compareTo!; + const td = (selection.timeDimensions ?? []).find((t) => t.dimension === cmp.dimension); + if (!td || !td.dateRange) { + throw new Error( + `[dataset-executor] compareTo requires a timeDimension "${cmp.dimension}" with a dateRange.`, + ); + } + const range: [string, string] = Array.isArray(td.dateRange) + ? [td.dateRange[0], td.dateRange[1] ?? td.dateRange[0]] + : [td.dateRange, td.dateRange]; + const shifted = shiftRange(range, cmp.kind); + const shiftedTd = (selection.timeDimensions ?? []).map((t) => + t.dimension === cmp.dimension ? { ...t, dateRange: shifted } : t, + ); + const sub = await this.service.query({ + cube: compiled.cube.name, + measures, + dimensions, + where: baseFilter as Record | undefined, + timeDimensions: shiftedTd, + timezone: selection.timezone ?? 'UTC', + }, context); + // Rename measure columns to `__compare` so they merge alongside primary. + return sub.rows.map((row) => { + const out: Record = {}; + for (const dim of dimensions) out[dim] = row[dim]; + for (const m of measures) out[`${m}__compare`] = row[m]; + return out; + }); + } +} + +/** + * Left-merge `extra` rows onto `base` rows by their dimension-key tuple, + * copying the listed value columns. Rows in `extra` with no base match are + * appended (outer-ish merge so comparison-only buckets still surface). + */ +export function mergeByDimensions( + base: Record[], + extra: Record[], + dimensions: string[], + valueColumns: string[], +): Record[] { + const keyOf = (row: Record) => dimensions.map((d) => String(row[d] ?? '')).join(''); + const index = new Map>(); + for (const row of base) index.set(keyOf(row), row); + + for (const row of extra) { + const key = keyOf(row); + const target = index.get(key); + if (target) { + for (const c of valueColumns) target[c] = row[c]; + } else { + const fresh: Record = {}; + for (const d of dimensions) fresh[d] = row[d]; + for (const c of valueColumns) fresh[c] = row[c]; + index.set(key, fresh); + base.push(fresh); + } + } + return base; +} diff --git a/packages/services/service-analytics/src/index.ts b/packages/services/service-analytics/src/index.ts index 77e6e9198..f94cfcf72 100644 --- a/packages/services/service-analytics/src/index.ts +++ b/packages/services/service-analytics/src/index.ts @@ -11,6 +11,19 @@ export type { AnalyticsServicePluginOptions } from './plugin.js'; // Cube registry export { CubeRegistry } from './cube-registry.js'; +// Dataset semantic layer (ADR-0021) +export { compileDataset } from './dataset-compiler.js'; +export type { CompiledDataset, DerivedMeasureSpec, RelationshipResolver } from './dataset-compiler.js'; +export { + DatasetExecutor, + evaluateDerivedMeasures, + combineFilters, + shiftRange, + mergeByDimensions, +} from './dataset-executor.js'; +export type { DatasetSelection, CompareTo } from './dataset-executor.js'; +export { compileScopedFilterToSql } from './read-scope-sql.js'; + // Strategies export { NativeSQLStrategy } from './strategies/native-sql-strategy.js'; export { ObjectQLStrategy } from './strategies/objectql-strategy.js'; diff --git a/packages/services/service-analytics/src/plugin.ts b/packages/services/service-analytics/src/plugin.ts index 2f1c01bb9..42ed0960a 100644 --- a/packages/services/service-analytics/src/plugin.ts +++ b/packages/services/service-analytics/src/plugin.ts @@ -1,7 +1,8 @@ // Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. import type { Plugin, PluginContext } from '@objectstack/core'; -import type { Cube } from '@objectstack/spec/data'; +import type { Cube, FilterCondition } from '@objectstack/spec/data'; +import type { ExecutionContext } from '@objectstack/spec/kernel'; import type { IAnalyticsService } from '@objectstack/spec/contracts'; import { AnalyticsService } from './analytics-service.js'; import type { AnalyticsServiceConfig } from './analytics-service.js'; @@ -21,6 +22,8 @@ interface DataEngineLike { aggregations?: Array<{ function: string; field: string; alias: string }>; }): Promise; execute?(command: unknown, options?: Record): Promise; + /** Return the registered object schema (for relationship → target resolution). */ + getObject?(name: string): { fields?: Record } | undefined; } /** @@ -46,6 +49,20 @@ export interface AnalyticsServicePluginOptions { aggregations?: Array<{ field: string; method: string; alias: string }>; filter?: Record; }) => Promise[]>; + /** + * ADR-0021 D-C — context-aware per-object read scope (tenant + RLS). The + * runtime supplies this from its sharing middleware so the analytics raw-SQL + * path cannot bypass tenant isolation. Receives the request's ExecutionContext + * and returns the RLS `FilterCondition` for the object (what `RLSCompiler` + * emits). When omitted, the plugin auto-bridges to a registered `'security'` + * service exposing `getReadFilter(object, context)` if one is present. + */ + getReadScope?: (objectName: string, context?: ExecutionContext) => FilterCondition | null | undefined; + /** + * ADR-0021 D-C — join allowlist per cube (the dataset's declared `include`). + * Typically wired from the dataset registry's compiled `allowedRelationships`. + */ + getAllowedRelationships?: (cubeName: string) => Set | undefined; /** Enable debug logging. */ debug?: boolean; } @@ -199,6 +216,55 @@ export class AnalyticsServicePlugin implements Plugin { inMemory: false, })); + // ADR-0021 D-C — wire the read-scope provider. Prefer an explicit option; + // otherwise auto-bridge to a registered `'security'` service that exposes + // `getReadFilter(object, context)` (resolved at call time so plugin-init + // order does not matter). This keeps analytics decoupled from security. + interface SecurityReadFilter { + getReadFilter(object: string, context?: ExecutionContext): FilterCondition | null | undefined; + } + let getReadScope = this.options.getReadScope; + let autoBridgedReadScope = false; + if (!getReadScope) { + const trySecurity = (): SecurityReadFilter | undefined => { + try { + const svc = ctx.getService('security'); + return svc && typeof svc.getReadFilter === 'function' ? svc : undefined; + } catch { + return undefined; + } + }; + if (trySecurity()) { + getReadScope = (object, context) => trySecurity()?.getReadFilter(object, context); + autoBridgedReadScope = true; + } + } + + // ADR-0021 — relationship → target-object resolver. A dataset's `include` + // names lookup/master_detail FIELDS on the base object; the joined TABLE is + // each field's `reference` target (which can differ from the field name, + // e.g. lookup `account` → object `crm_account`). Resolve from the 'data' + // engine's object schema at compile time so cross-object joins target the + // right table. Resolved lazily so plugin-init order doesn't matter. + const relationshipResolver = (baseObject: string, relationshipName: string): string | undefined => { + const engine = (() => { + try { + const svc = ctx.getService('data'); + return svc && typeof svc.getObject === 'function' ? svc : undefined; + } catch { return undefined; } + })(); + const obj = engine?.getObject?.(baseObject); + const field = obj?.fields?.[relationshipName]; + if (field && (field.type === 'lookup' || field.type === 'master_detail') && field.reference) { + return field.reference; + } + // Unknown to the schema — fall back to the relationship name as the table + // (legacy same-name convention). Returning undefined would make the + // compiler reject the dataset; the name-as-table fallback is safer for + // engines that don't expose getObject. + return engine ? undefined : relationshipName; + }; + const config: AnalyticsServiceConfig = { cubes: this.options.cubes, logger: ctx.logger, @@ -206,8 +272,21 @@ export class AnalyticsServicePlugin implements Plugin { executeRawSql, executeAggregate, fallbackService, + getReadScope, + getAllowedRelationships: this.options.getAllowedRelationships, + relationshipResolver, }; + if (autoBridgedReadScope) { + ctx.logger.info('[Analytics] Auto-bridged getReadScope → "security" service (getReadFilter)'); + } else if (!getReadScope) { + ctx.logger.warn( + '[Analytics] No getReadScope configured and no "security" service with getReadFilter found — ' + + 'the raw-SQL analytics path will NOT enforce tenant/RLS scoping on joined objects (ADR-0021 D-C). ' + + 'Supply getReadScope or register a security service in multi-tenant deployments.', + ); + } + if (autoBridged) { ctx.logger.info('[Analytics] Auto-bridged executeAggregate → "data" service (IDataEngine)'); } diff --git a/packages/services/service-analytics/src/read-scope-sql.ts b/packages/services/service-analytics/src/read-scope-sql.ts new file mode 100644 index 000000000..0c0b4988d --- /dev/null +++ b/packages/services/service-analytics/src/read-scope-sql.ts @@ -0,0 +1,140 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import type { FilterCondition } from '@objectstack/spec/data'; + +/** + * Compile an RLS / tenant read-scope `FilterCondition` into a parameterized, + * alias-qualified SQL predicate (ADR-0021 D-C). + * + * This is the single, security-critical translation point between the + * canonical Mongo-style filter the `RLSCompiler` emits and the raw SQL the + * analytics `NativeSQLStrategy` runs. It is deliberately: + * + * - **Fail-closed.** Any operator, value shape, or identifier it cannot + * translate THROWS. A read-scope predicate must never be silently dropped — + * dropping it would run the query unscoped and leak cross-tenant data. + * - **Injection-safe.** Field/alias identifiers are validated against a strict + * snake_case pattern and every value is bound as a `?` placeholder (the + * strategy renumbers `?` → `$N`). No value is ever interpolated into SQL. + * - **Alias-qualified.** Bare fields become `"alias"."field"` so the same + * predicate applies to the base table or any joined table. + * + * Supports the operators the RLS layer and common policies emit: implicit + * equality, `$eq/$ne/$gt/$gte/$lt/$lte/$in/$nin/$between/$contains/$notContains/ + * $startsWith/$endsWith/$null/$exists`, and `$and/$or/$not` combinators. + */ + +const IDENT = /^[a-z_][a-z0-9_]*$/i; + +function quoteIdent(name: string, kind: string): string { + if (typeof name !== 'string' || !IDENT.test(name)) { + throw new Error(`[read-scope-sql] unsafe ${kind} identifier "${String(name)}" — refusing to build read scope (fail-closed).`); + } + return `"${name}"`; +} + +export function compileScopedFilterToSql( + filter: FilterCondition, + alias: string, +): { sql: string; params: unknown[] } { + const quotedAlias = quoteIdent(alias, 'alias'); + const params: unknown[] = []; + const sql = compileNode(filter, quotedAlias, params); + return { sql, params }; +} + +/** Compile a filter node into a boolean SQL expression ('' = empty/no constraint). */ +function compileNode(node: unknown, qAlias: string, params: unknown[]): string { + if (node === null || typeof node !== 'object' || Array.isArray(node)) { + throw new Error('[read-scope-sql] read scope must be a filter object (fail-closed).'); + } + const clauses: string[] = []; + for (const [key, value] of Object.entries(node as Record)) { + if (key === '$and' || key === '$or') { + if (!Array.isArray(value) || value.length === 0) { + throw new Error(`[read-scope-sql] "${key}" requires a non-empty array (fail-closed).`); + } + const parts = (value as unknown[]) + .map((child) => compileNode(child, qAlias, params)) + .filter((s) => s.length > 0); + if (parts.length === 0) continue; + const joiner = key === '$and' ? ' AND ' : ' OR '; + clauses.push(`(${parts.join(joiner)})`); + } else if (key === '$not') { + const inner = compileNode(value, qAlias, params); + if (inner) clauses.push(`NOT (${inner})`); + } else if (key.startsWith('$')) { + throw new Error(`[read-scope-sql] unsupported top-level operator "${key}" (fail-closed).`); + } else { + clauses.push(compileField(key, value, qAlias, params)); + } + } + return clauses.join(' AND '); +} + +/** Compile a single `field: value | { $op: ... }` entry. */ +function compileField(field: string, value: unknown, qAlias: string, params: unknown[]): string { + const col = `${qAlias}.${quoteIdent(field, 'field')}`; + + // Scalar / null → implicit equality. + if (value === null) return `${col} IS NULL`; + if (typeof value !== 'object' || value instanceof Date) { + params.push(value); + return `${col} = ?`; + } + if (Array.isArray(value)) { + throw new Error(`[read-scope-sql] bare array value for "${field}" — use { $in: [...] } (fail-closed).`); + } + + const ops = value as Record; + const keys = Object.keys(ops); + // A value object must be ALL operators; a non-$ key means a nested relation, + // which a flat read scope cannot join — fail closed. + if (keys.length === 0 || keys.some((k) => !k.startsWith('$'))) { + throw new Error(`[read-scope-sql] "${field}" has a nested/relation value which is not supported in a read scope (fail-closed).`); + } + + const parts: string[] = []; + for (const op of keys) { + parts.push(compileOperator(col, op, ops[op], field, params)); + } + return parts.length === 1 ? parts[0] : `(${parts.join(' AND ')})`; +} + +function bind(params: unknown[], v: unknown): string { + params.push(v); + return '?'; +} + +function compileOperator(col: string, op: string, val: unknown, field: string, params: unknown[]): string { + switch (op) { + case '$eq': return val === null ? `${col} IS NULL` : `${col} = ${bind(params, val)}`; + case '$ne': return val === null ? `${col} IS NOT NULL` : `${col} <> ${bind(params, val)}`; + case '$gt': return `${col} > ${bind(params, val)}`; + case '$gte': return `${col} >= ${bind(params, val)}`; + case '$lt': return `${col} < ${bind(params, val)}`; + case '$lte': return `${col} <= ${bind(params, val)}`; + case '$in': { + if (!Array.isArray(val)) throw new Error(`[read-scope-sql] $in for "${field}" needs an array (fail-closed).`); + if (val.length === 0) return '1 = 0'; // IN () matches nothing — safe + return `${col} IN (${val.map((v) => bind(params, v)).join(', ')})`; + } + case '$nin': { + if (!Array.isArray(val)) throw new Error(`[read-scope-sql] $nin for "${field}" needs an array (fail-closed).`); + if (val.length === 0) return '1 = 1'; // NOT IN () excludes nothing + return `${col} NOT IN (${val.map((v) => bind(params, v)).join(', ')})`; + } + case '$between': { + if (!Array.isArray(val) || val.length !== 2) throw new Error(`[read-scope-sql] $between for "${field}" needs [min,max] (fail-closed).`); + return `${col} BETWEEN ${bind(params, val[0])} AND ${bind(params, val[1])}`; + } + case '$contains': return `${col} LIKE ${bind(params, `%${String(val)}%`)}`; + case '$notContains': return `${col} NOT LIKE ${bind(params, `%${String(val)}%`)}`; + case '$startsWith': return `${col} LIKE ${bind(params, `${String(val)}%`)}`; + case '$endsWith': return `${col} LIKE ${bind(params, `%${String(val)}`)}`; + case '$null': return val ? `${col} IS NULL` : `${col} IS NOT NULL`; + case '$exists': return val ? `${col} IS NOT NULL` : `${col} IS NULL`; + default: + throw new Error(`[read-scope-sql] unsupported operator "${op}" on "${field}" (fail-closed).`); + } +} diff --git a/packages/services/service-analytics/src/strategies/native-sql-strategy.ts b/packages/services/service-analytics/src/strategies/native-sql-strategy.ts index ce6fe8ea2..7c880968e 100644 --- a/packages/services/service-analytics/src/strategies/native-sql-strategy.ts +++ b/packages/services/service-analytics/src/strategies/native-sql-strategy.ts @@ -4,6 +4,7 @@ import type { AnalyticsQuery, AnalyticsResult } from '@objectstack/spec/contract import type { Cube } from '@objectstack/spec/data'; import type { AnalyticsStrategy, StrategyContext } from './types.js'; import { normalizeAnalyticsFilters, coerceFilterValueForSql } from './filter-normalizer.js'; +import { compileScopedFilterToSql } from '../read-scope-sql.js'; /** * NativeSQLStrategy — Priority 1 @@ -91,6 +92,30 @@ export class NativeSQLStrategy implements AnalyticsStrategy { } } + // ── ADR-0021 D-C — enforce the join allowlist + inject per-object RLS ── + // 1. Reject any join not backed by a relationship the dataset declared. + const allowed = ctx.getAllowedRelationships?.(query.cube!); + if (allowed) { + for (const alias of joins.keys()) { + if (!allowed.has(alias)) { + throw new Error( + `[NativeSQLStrategy] join "${alias}" is not backed by a declared relationship on ` + + `cube "${query.cube}". v1 only joins along relationships listed in the dataset's \`include\`.`, + ); + } + } + } + // 2. Inject the tenant/RLS read scope for the base table AND every joined + // object — this is the predicate the raw-SQL path would otherwise skip. + this.applyReadScope(this.extractObjectName(cube), tableName, ctx, whereClauses, params); + for (const alias of joins.keys()) { + // The joined OBJECT (for the RLS lookup) is the target table from the + // cube's join map; the ALIAS is how it's referenced in SQL. These differ + // for namespaced objects (alias `account` → object `crm_account`). + const joinedObject = cube.joins?.[alias]?.name ?? alias; + this.applyReadScope(joinedObject, alias, ctx, whereClauses, params); + } + let sql = `SELECT ${selectClauses.join(', ')} FROM "${tableName}"`; if (joins.size > 0) { sql += ' ' + Array.from(joins.values()).join(' '); @@ -117,6 +142,35 @@ export class NativeSQLStrategy implements AnalyticsStrategy { // ── Helpers ────────────────────────────────────────────────────── + /** + * ADR-0021 D-C — inject an object's read scope (tenant + RLS predicate) into + * the WHERE clause. The scope is a canonical `FilterCondition` (what the + * RLSCompiler emits); `compileScopedFilterToSql` turns it into alias-qualified, + * parameterized SQL (fail-closed — it throws rather than drop a predicate). + * The `?` placeholders are then renumbered into the strategy's `$N` scheme. + * No-op when the runtime provides no scope hook (the caller is then + * responsible for isolation — see contract note). + */ + private applyReadScope( + objectName: string, + alias: string, + ctx: StrategyContext, + whereClauses: string[], + params: unknown[], + ): void { + if (typeof ctx.getReadScope !== 'function') return; + const filter = ctx.getReadScope(objectName); + if (filter === undefined || filter === null) return; + const { sql, params: scopeParams } = compileScopedFilterToSql(filter, alias); + if (!sql) return; + let i = 0; + const rendered = sql.replace(/\?/g, () => { + params.push(scopeParams[i++]); + return `$${params.length}`; + }); + whereClauses.push(`(${rendered})`); + } + /** * Resolve a dimension/measure/filter SQL expression that may reference a * related table via dot notation (e.g. `account.industry`). @@ -139,6 +193,7 @@ export class NativeSQLStrategy implements AnalyticsStrategy { rawSql: string, parentTable: string, joins: Map, + cube?: Cube, ): string { if (!rawSql.includes('.')) return rawSql; // Only the first dotted hop is supported (single-level relation). @@ -146,9 +201,18 @@ export class NativeSQLStrategy implements AnalyticsStrategy { if (!alias || rest.length === 0) return rawSql; const column = rest.join('.'); if (!joins.has(alias)) { + // The relationship name is the join ALIAS; the joined TABLE is the + // related object. For datasets these differ when objects are namespaced + // (lookup `account` → table `crm_account`), so resolve the table from the + // Cube's `joins` map (emitted by the dataset compiler). Fall back to the + // alias as the table for legacy/same-name cubes. + const joinTable = cube?.joins?.[alias]?.name ?? alias; + // Only emit an explicit alias when the table differs from it; when they + // match, `LEFT JOIN "account" ON …` is cleaner (and back-compat). + const tableRef = joinTable === alias ? `"${alias}"` : `"${joinTable}" "${alias}"`; joins.set( alias, - `LEFT JOIN "${alias}" ON "${parentTable}"."${alias}" = "${alias}"."id"`, + `LEFT JOIN ${tableRef} ON "${parentTable}"."${alias}" = "${alias}"."id"`, ); } return `"${alias}"."${column}"`; @@ -203,7 +267,7 @@ export class NativeSQLStrategy implements AnalyticsStrategy { ): string { const dim = this.lookupMember(cube, member, 'dimension'); const raw = dim ? dim.sql : (member.includes('.') ? member.split('.')[1] : member); - return this.qualifyAndRegisterJoin(raw, parentTable, joins); + return this.qualifyAndRegisterJoin(raw, parentTable, joins, cube); } private resolveMeasureSql( @@ -219,7 +283,7 @@ export class NativeSQLStrategy implements AnalyticsStrategy { const col = measure.sql === '*' ? '*' - : this.qualifyAndRegisterJoin(measure.sql, parentTable, joins); + : this.qualifyAndRegisterJoin(measure.sql, parentTable, joins, cube); switch (measure.type) { case 'count': return 'COUNT(*)'; case 'sum': return `SUM(${col})`; @@ -238,9 +302,9 @@ export class NativeSQLStrategy implements AnalyticsStrategy { joins: Map, ): string { const dim = this.lookupMember(cube, member, 'dimension'); - if (dim) return this.qualifyAndRegisterJoin(dim.sql, parentTable, joins); + if (dim) return this.qualifyAndRegisterJoin(dim.sql, parentTable, joins, cube); const measure = this.lookupMember(cube, member, 'measure'); - if (measure) return this.qualifyAndRegisterJoin(measure.sql, parentTable, joins); + if (measure) return this.qualifyAndRegisterJoin(measure.sql, parentTable, joins, cube); const fieldName = member.includes('.') ? member.split('.')[1] : member; return fieldName; } diff --git a/packages/spec/src/contracts/analytics-service.ts b/packages/spec/src/contracts/analytics-service.ts index 848096ada..8d7cefa86 100644 --- a/packages/spec/src/contracts/analytics-service.ts +++ b/packages/spec/src/contracts/analytics-service.ts @@ -1,6 +1,9 @@ // Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. import type { Cube } from '../data/analytics.zod.js'; +import type { FilterCondition } from '../data/filter.zod.js'; +import type { ExecutionContext } from '../kernel/execution-context.zod.js'; +import type { Dataset } from '../ui/dataset.zod.js'; /** * IAnalyticsService - Analytics / BI Service Contract @@ -84,13 +87,50 @@ export interface CubeMeta { dimensions: Array<{ name: string; type: string; title?: string }>; } +/** + * Compare-to directive (ADR-0021): runs a time-shifted second query and + * attaches `__compare` columns to each row. + */ +export interface DatasetCompareTo { + /** previousPeriod = equal-length window immediately before; previousYear = same window −1y. */ + kind: 'previousPeriod' | 'previousYear'; + /** The time dimension (by name) whose dateRange is shifted. */ + dimension: string; +} + +/** + * A presentation's selection against a dataset (ADR-0021). Report/dashboard + * widgets bind to a dataset and pick dimensions/measures BY NAME; this is the + * wire shape a preview/query endpoint posts. + */ +export interface DatasetSelection { + /** Dimension names from the dataset. */ + dimensions?: string[]; + /** Measure names from the dataset (may include derived measures). */ + measures: string[]; + /** Presentation-scope filter, ANDed with the dataset's intrinsic filter at render. */ + runtimeFilter?: FilterCondition; + /** Optional time-dimension windows passed through to the runtime. */ + timeDimensions?: AnalyticsQuery['timeDimensions']; + order?: Record; + limit?: number; + offset?: number; + /** Compare-to directive — runs a shifted query and attaches `__compare`. */ + compareTo?: DatasetCompareTo; + timezone?: string; +} + export interface IAnalyticsService { /** * Execute an analytical query * @param query - The analytics query definition + * @param context - The caller's ExecutionContext (tenant, user, roles). Used + * to compute the per-request tenant/RLS read scope for the raw-SQL path + * (ADR-0021 D-C). Optional for backward-compat and in-memory/dev use, but + * REQUIRED for multi-tenant isolation on cross-object queries. * @returns Query results with rows and field metadata */ - query(query: AnalyticsQuery): Promise; + query(query: AnalyticsQuery, context?: ExecutionContext): Promise; /** * Get available cube metadata for discovery @@ -102,9 +142,29 @@ export interface IAnalyticsService { /** * Generate SQL for a query without executing it (dry-run) * @param query - The analytics query definition + * @param context - The caller's ExecutionContext (see {@link query}). * @returns Generated SQL string and parameters */ - generateSql?(query: AnalyticsQuery): Promise<{ sql: string; params: unknown[] }>; + generateSql?(query: AnalyticsQuery, context?: ExecutionContext): Promise<{ sql: string; params: unknown[] }>; + + /** + * Execute a semantic-layer `dataset` (ADR-0021): compile it to the Cube + * runtime, then run the presentation's `selection` (dimensions/measures by + * name, runtime filter, compareTo) — returning chart-ready rows. The + * `dataset` may be a saved definition or an inline draft (Studio preview). + * + * Optional: implementations that only support raw cube queries may omit it; + * callers should feature-detect (`typeof svc.queryDataset === 'function'`). + * + * @param dataset - The dataset definition (saved or inline draft). + * @param selection - Dimensions/measures to project + runtime directives. + * @param context - The request's ExecutionContext (tenant/RLS, see {@link query}). + */ + queryDataset?( + dataset: Dataset, + selection: DatasetSelection, + context?: ExecutionContext, + ): Promise; } // ========================================== @@ -156,6 +216,40 @@ export interface StrategyContext { getMeta(cubeName?: string): Promise; generateSql?(query: AnalyticsQuery): Promise<{ sql: string; params: unknown[] }>; }; + + /** + * ADR-0021 D-C — per-object read scope (RLS + tenant isolation). + * + * Returns the security predicate that MUST be ANDed into the query for the + * given object, as a canonical Mongo-style `FilterCondition` (exactly what + * the `RLSCompiler` emits). The strategy compiles it to alias-qualified, + * parameterized SQL and injects it for the base table AND every joined + * object, closing the raw-SQL bypass at `engine.ts` (`execute()` does not + * thread tenant scope on its own). + * + * This hook is bound to the current request's `ExecutionContext` by the + * `IAnalyticsService` implementation (see `query(query, context)`), so the + * provider already knows the active tenant when it is called. + * + * @example + * ```ts + * getReadScope: (obj) => ({ organization_id: tenantId }) + * ``` + * + * Returning `undefined`/`null` means "no scope for this object" (e.g. a + * global control-plane table). When this hook is absent entirely the + * strategy runs unscoped — callers that require isolation MUST provide it. + */ + getReadScope?(objectName: string): FilterCondition | null | undefined; + + /** + * ADR-0021 D-C — join allowlist. Returns the set of relationship aliases the + * dataset behind `cubeName` explicitly declared via `include`. The strategy + * REJECTS any join whose alias is not in this set (v1 only joins along + * declared relationships). Returning `undefined` disables the check (legacy + * Cube definitions that pre-date datasets). + */ + getAllowedRelationships?(cubeName: string): Set | undefined; } /** diff --git a/packages/spec/src/kernel/metadata-plugin.zod.ts b/packages/spec/src/kernel/metadata-plugin.zod.ts index 2e1a14428..bec83066a 100644 --- a/packages/spec/src/kernel/metadata-plugin.zod.ts +++ b/packages/spec/src/kernel/metadata-plugin.zod.ts @@ -85,6 +85,7 @@ export const MetadataTypeSchema = lazySchema(() => z.enum([ 'app', // Application shell (AppSchema) 'action', // UI/Server actions (ActionSchema) 'report', // Report definitions (ReportSchema) + 'dataset', // Analytics semantic layer — dimensions/measures (DatasetSchema, ADR-0021) // Automation Protocol 'flow', // Visual logic flows (FlowSchema) @@ -615,6 +616,9 @@ export const DEFAULT_METADATA_TYPE_REGISTRY: MetadataTypeRegistryEntry[] = [ { type: 'app', label: 'Application', filePatterns: ['**/*.app.ts', '**/*.app.yml', '**/*.app.json'], supportsOverlay: true, allowOrgOverride: true, allowRuntimeCreate: true, supportsVersioning: true, executionPinned: false, loadOrder: 70, domain: 'ui' }, { type: 'action', label: 'Action', filePatterns: ['**/*.action.ts', '**/*.action.yml'], supportsOverlay: false, allowOrgOverride: true, allowRuntimeCreate: true, supportsVersioning: true, executionPinned: false, loadOrder: 50, domain: 'ui' }, { type: 'report', label: 'Report', filePatterns: ['**/*.report.ts', '**/*.report.yml'], supportsOverlay: true, allowOrgOverride: true, allowRuntimeCreate: true, supportsVersioning: true, executionPinned: false, loadOrder: 60, domain: 'ui' }, + // ADR-0021: dataset is the analytics semantic layer that report/dashboard bind to. + // loadOrder 55 < report/dashboard (60) so datasets register before their consumers. + { type: 'dataset', label: 'Dataset', description: 'Analytics semantic layer — dimensions & measures', filePatterns: ['**/*.dataset.ts', '**/*.dataset.yml', '**/*.dataset.json'], supportsOverlay: true, allowOrgOverride: true, allowRuntimeCreate: true, supportsVersioning: true, executionPinned: false, loadOrder: 55, domain: 'ui' }, // Automation Protocol — flow is executionPinned (ADR-0009). // ADR-0019: there is no `approval` metadata type — approvals are Approval diff --git a/packages/spec/src/kernel/metadata-type-schemas.ts b/packages/spec/src/kernel/metadata-type-schemas.ts index 82cb6c83a..c635356f3 100644 --- a/packages/spec/src/kernel/metadata-type-schemas.ts +++ b/packages/spec/src/kernel/metadata-type-schemas.ts @@ -41,6 +41,7 @@ import { AppSchema } from '../ui/app.zod'; import { ActionSchema } from '../ui/action.zod'; import type { Action } from '../ui/action.zod'; import { ReportSchema } from '../ui/report.zod'; +import { DatasetSchema } from '../ui/dataset.zod'; import { FlowSchema } from '../automation/flow.zod'; @@ -80,6 +81,7 @@ const BUILTIN_METADATA_TYPE_SCHEMAS: Partial> = app: AppSchema, action: ActionSchema, report: ReportSchema, + dataset: DatasetSchema, // ADR-0021: analytics semantic layer // Automation Protocol flow: FlowSchema, diff --git a/packages/spec/src/shared/metadata-collection.zod.ts b/packages/spec/src/shared/metadata-collection.zod.ts index 2cdfc41d0..4758e8a8b 100644 --- a/packages/spec/src/shared/metadata-collection.zod.ts +++ b/packages/spec/src/shared/metadata-collection.zod.ts @@ -74,6 +74,7 @@ export const MAP_SUPPORTED_FIELDS = [ 'pages', 'dashboards', 'reports', + 'datasets', 'actions', 'themes', 'flows', @@ -111,6 +112,7 @@ export const PLURAL_TO_SINGULAR: Record = { pages: 'page', dashboards: 'dashboard', reports: 'report', + datasets: 'dataset', actions: 'action', themes: 'theme', flows: 'flow', diff --git a/packages/spec/src/stack.zod.ts b/packages/spec/src/stack.zod.ts index 0984b9d8a..de1584d75 100644 --- a/packages/spec/src/stack.zod.ts +++ b/packages/spec/src/stack.zod.ts @@ -20,6 +20,7 @@ import { ViewSchema } from './ui/view.zod'; import { PageSchema } from './ui/page.zod'; import { DashboardSchema } from './ui/dashboard.zod'; import { ReportSchema } from './ui/report.zod'; +import { DatasetSchema } from './ui/dataset.zod'; import { ActionSchema } from './ui/action.zod'; import { ThemeSchema } from './ui/theme.zod'; @@ -207,6 +208,7 @@ export const ObjectStackDefinitionSchema = lazySchema(() => z.object({ pages: z.array(PageSchema).optional().describe('Custom Pages'), dashboards: z.array(DashboardSchema).optional().describe('Dashboards'), reports: z.array(ReportSchema).optional().describe('Analytics Reports'), + datasets: z.array(DatasetSchema).optional().describe('Analytics semantic-layer datasets (ADR-0021)'), actions: z.array(ActionSchema).optional().describe('Global and Object Actions'), themes: z.array(ThemeSchema).optional().describe('UI Themes'), diff --git a/packages/spec/src/ui/dataset.test.ts b/packages/spec/src/ui/dataset.test.ts new file mode 100644 index 000000000..88cf52d16 --- /dev/null +++ b/packages/spec/src/ui/dataset.test.ts @@ -0,0 +1,90 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import { DatasetSchema, defineDataset } from './dataset.zod'; + +const base = { + name: 'sales', + label: 'Sales', + object: 'opportunity', + include: ['account'], + dimensions: [{ name: 'region', field: 'account.region', type: 'string' as const }], + measures: [ + { name: 'revenue', aggregate: 'sum' as const, field: 'amount' }, + { name: 'deal_count', aggregate: 'count' as const }, + ], +}; + +describe('DatasetSchema', () => { + it('accepts a well-formed dataset and applies defaults (certified=false)', () => { + const ds = DatasetSchema.parse(base); + expect(ds.measures[0].certified).toBe(false); + expect(ds.object).toBe('opportunity'); + }); + + it('rejects a non-count measure with no field', () => { + expect(() => + DatasetSchema.parse({ ...base, measures: [{ name: 'revenue', aggregate: 'sum' }] }), + ).toThrowError(/requires `field`/); + }); + + it('allows count with no field', () => { + expect(() => + DatasetSchema.parse({ ...base, measures: [{ name: 'total', aggregate: 'count' }] }), + ).not.toThrow(); + }); + + it('rejects duplicate measure names', () => { + expect(() => + DatasetSchema.parse({ + ...base, + measures: [ + { name: 'revenue', aggregate: 'sum', field: 'amount' }, + { name: 'revenue', aggregate: 'avg', field: 'amount' }, + ], + }), + ).toThrowError(/duplicate measure name/); + }); + + it('rejects a derived measure referencing an unknown measure', () => { + expect(() => + DatasetSchema.parse({ + ...base, + measures: [ + { name: 'revenue', aggregate: 'sum', field: 'amount' }, + { name: 'win_rate', aggregate: 'sum', derived: { op: 'ratio', of: ['won_amount', 'revenue'] } }, + ], + }), + ).toThrowError(/references unknown measure/); + }); + + it('rejects a derived measure referencing itself', () => { + expect(() => + DatasetSchema.parse({ + ...base, + measures: [ + { name: 'revenue', aggregate: 'sum', field: 'amount' }, + { name: 'loop', aggregate: 'sum', derived: { op: 'sum', of: ['loop'] } }, + ], + }), + ).toThrowError(/cannot reference itself/); + }); + + it('accepts a valid derived measure', () => { + expect(() => + DatasetSchema.parse({ + ...base, + measures: [ + { name: 'revenue', aggregate: 'sum', field: 'amount' }, + { name: 'won_amount', aggregate: 'sum', field: 'amount', filter: { stage: 'won' } }, + { name: 'win_rate', aggregate: 'sum', derived: { op: 'ratio', of: ['won_amount', 'revenue'] } }, + ], + }), + ).not.toThrow(); + }); + + it('defineDataset is an identity helper', () => { + const d = defineDataset(base); + expect(d).toBe(base); + }); +}); diff --git a/packages/spec/src/ui/dataset.zod.ts b/packages/spec/src/ui/dataset.zod.ts new file mode 100644 index 000000000..8de31ed22 --- /dev/null +++ b/packages/spec/src/ui/dataset.zod.ts @@ -0,0 +1,200 @@ +// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license. + +import { z } from 'zod'; +import { lazySchema } from '../shared/lazy-schema'; +import { ProtectionSchema } from '../shared/protection.zod'; +import { MetadataProtectionFields } from '../kernel/metadata-protection.zod'; +import { FilterConditionSchema } from '../data/filter.zod'; +import { SnakeCaseIdentifierSchema } from '../shared/identifiers.zod'; +import { I18nLabelSchema } from './i18n.zod'; +import { AggregationFunction, DateGranularity } from '../data/query.zod'; + +/** + * Analytics Dataset — the one semantic layer (ADR-0021). + * + * A `dataset` is a named, reusable analytical definition: a base object, the + * relationships to include (joins are *derived* from the object graph — the + * author never writes an `ON` clause), and the declared **dimensions** + * (groupable axes) and **measures** (aggregatable values). It is deliberately + * SMALLER than `QuerySchema`: no raw SQL, no hand-authored join predicates, + * no window/having grammar in the author surface. + * + * Presentations (`report` / `dashboard`) bind to a dataset by reference and + * pick dimensions/measures *by name*. The dataset compiles to the existing + * Cube analytics runtime (ADR-0021 D-A=(c)); RLS / tenant scoping is enforced + * by the runtime per joined object (D-C), never declared here. + * + * Naming: this module owns the high-prior `dataset` / `dimension` / `measure` + * vocabulary (LookML / dbt / Cube / PowerBI). The Zod export identifiers are + * `Dataset`-prefixed (`DatasetDimensionSchema`, `DatasetMeasureSchema`) so they + * do not clash with the Cube layer's `DimensionSchema` / `MetricSchema` in + * `data/analytics.zod.ts` while the two layers coexist (Phase 1). The Cube + * layer is absorbed/retired in a later phase (D-A). + */ + +/** + * Dimension — a groupable axis (e.g. "region", "close_date by quarter"). + */ +export const DatasetDimensionSchema = lazySchema(() => z.object({ + /** Referenced by presentations (report rows/columns, widget dimensions). */ + name: SnakeCaseIdentifierSchema.describe('Dimension name — referenced by presentations'), + label: I18nLabelSchema.optional(), + /** + * A field on the base object, OR a `relationship.field` path (e.g. + * `account.region`). The join is DERIVED from the declared relationship in + * `Dataset.include` — the author never writes a predicate. + */ + field: z.string().describe('Base field or `relationship.field` path'), + type: z.enum(['string', 'number', 'date', 'boolean', 'lookup']).optional(), + /** Default bucketing for date dimensions (day/week/month/quarter/year). */ + dateGranularity: DateGranularity.optional(), +})); + +/** + * Derived-measure operator (ADR-0021 Q1). + * A derived measure references OTHER measures BY NAME only — no raw fields, + * no raw SQL — keeping it enumerable and reviewable. + */ +export const DerivedMeasureOp = z.enum(['ratio', 'sum', 'difference', 'product']); + +/** + * Measure — an aggregatable value (e.g. "revenue = sum(amount)"). Defined ONCE + * here; every presentation references it by name. + */ +export const DatasetMeasureSchema = lazySchema(() => z.object({ + name: SnakeCaseIdentifierSchema.describe('Measure name — e.g. "revenue"; defined once'), + label: I18nLabelSchema.optional(), + /** Aggregation function — reuses the canonical query.zod enum. */ + aggregate: AggregationFunction.describe('Aggregation (sum/avg/count/...)'), + /** Base or `relationship.field`. Optional for `count` (count(*)). */ + field: z.string().optional().describe('Aggregated field; optional for count(*)'), + /** Measure-scoped filter (e.g. only won deals for "won_amount"). */ + filter: FilterConditionSchema.optional(), + /** Display format, e.g. "$0,0.00", "0.0%". */ + format: z.string().optional(), + /** Governance: a human-blessed metric — the review checkpoint. */ + certified: z.boolean().default(false).describe('Blessed metric (governance checkpoint)'), + /** + * Derived measure — computed from OTHER measures in this dataset by name + * only. e.g. `{ op: 'ratio', of: ['won_amount', 'total_amount'] }`. + * Mutually exclusive with `field`/`aggregate` semantics: when `derived` is + * set, `aggregate` is ignored at compile time. + */ + derived: z.object({ + op: DerivedMeasureOp, + /** Names of other measures in this dataset (2+ for ratio/difference). */ + of: z.array(SnakeCaseIdentifierSchema).min(1), + }).optional(), +})); + +/** + * Dataset — the single analytical source of truth (ADR-0021 D1). + */ +export const DatasetSchema = lazySchema(() => z.object({ + /** Identity. */ + name: SnakeCaseIdentifierSchema.describe('Dataset unique name'), + label: I18nLabelSchema.describe('Dataset label'), + description: I18nLabelSchema.optional(), + + /** Base object — the FROM. */ + object: z.string().describe('Base object name'), + + /** + * Relationships to include, BY NAME (lookup / master_detail field names on + * the object graph). Joins are COMPILED from these — the author writes no ON + * clause. v1 (D-C): only declared relationships are joinable; no arbitrary + * predicates. + */ + include: z.array(z.string()).optional().describe('Relationship names to join (derived from object graph)'), + + /** Definition-level filter (the dataset's intrinsic scope, e.g. non-deleted). */ + filter: FilterConditionSchema.optional().describe('Intrinsic dataset scope filter'), + + /** The semantic contract presentations bind to. */ + dimensions: z.array(DatasetDimensionSchema).describe('Groupable axes'), + measures: z.array(DatasetMeasureSchema).describe('Aggregatable values'), + + /** + * ADR-0010 — package-author protection envelope; the loader translates this + * into the private `_lock` envelope at registration and strips it before + * persistence. + */ + protection: ProtectionSchema.optional().describe( + 'Package author protection block — lock policy for this dataset.', + ), + + // ADR-0010 — runtime protection envelope (internal — set by loader). + ...MetadataProtectionFields, +}).superRefine((ds, ctx) => { + // Measure names must be unique (presentations reference them by name). + const measureNames = new Set(); + for (const m of ds.measures) { + if (measureNames.has(m.name)) { + ctx.addIssue({ code: 'custom', message: `duplicate measure name "${m.name}"`, path: ['measures'] }); + } + measureNames.add(m.name); + } + // Dimension names must be unique. + const dimNames = new Set(); + for (const d of ds.dimensions) { + if (dimNames.has(d.name)) { + ctx.addIssue({ code: 'custom', message: `duplicate dimension name "${d.name}"`, path: ['dimensions'] }); + } + dimNames.add(d.name); + } + // Derived measures may only reference OTHER measures declared in this dataset. + for (const m of ds.measures) { + if (!m.derived) { + // A non-derived measure needs a field unless it is a plain count. + if (!m.field && m.aggregate !== 'count') { + ctx.addIssue({ + code: 'custom', + message: `measure "${m.name}" requires \`field\` (only \`count\` may omit it)`, + path: ['measures'], + }); + } + continue; + } + for (const ref of m.derived.of) { + if (ref === m.name) { + ctx.addIssue({ code: 'custom', message: `derived measure "${m.name}" cannot reference itself`, path: ['measures'] }); + } else if (!measureNames.has(ref)) { + ctx.addIssue({ + code: 'custom', + message: `derived measure "${m.name}" references unknown measure "${ref}"`, + path: ['measures'], + }); + } + } + } +})); + +/** + * Authoring helper — identity function that gives editors full type-checking + * and inference when defining a dataset in a `*.dataset.ts` file. + * + * @example + * ```ts + * export default defineDataset({ + * name: 'sales', + * label: 'Sales', + * object: 'opportunity', + * include: ['account'], + * dimensions: [{ name: 'region', field: 'account.region' }], + * measures: [{ name: 'revenue', aggregate: 'sum', field: 'amount', certified: true }], + * }); + * ``` + */ +export function defineDataset(dataset: DatasetInput): DatasetInput { + return dataset; +} + +export type DatasetDimension = z.infer; +export type DatasetMeasure = z.infer; +export type DerivedMeasureOpValue = z.infer; +export type Dataset = z.infer; + +/** Input types for authoring (optional fields with defaults may be omitted). */ +export type DatasetDimensionInput = z.input; +export type DatasetMeasureInput = z.input; +export type DatasetInput = z.input; diff --git a/packages/spec/src/ui/index.ts b/packages/spec/src/ui/index.ts index 58681900c..c1ea9636c 100644 --- a/packages/spec/src/ui/index.ts +++ b/packages/spec/src/ui/index.ts @@ -17,6 +17,7 @@ export * from './app.zod'; export * from './view.zod'; export * from './dashboard.zod'; export * from './report.zod'; +export * from './dataset.zod'; export { reportForm } from './report.form'; export { viewForm } from './view.form'; export { appForm } from './app.form';