From 5ede9116b798dcf4a23831289fea9b5a2ce9bee6 Mon Sep 17 00:00:00 2001 From: David de Boer Date: Wed, 1 Apr 2026 11:21:38 +0200 Subject: [PATCH] feat(pipeline-void): allow passing additional vocabularies to detectVocabularies and voidStages --- packages/pipeline-void/README.md | 5 ++-- packages/pipeline-void/src/stage.ts | 27 ++++++++++++++++--- .../pipeline-void/src/vocabularyAnalyzer.ts | 2 +- packages/pipeline-void/vite.config.ts | 2 +- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/packages/pipeline-void/README.md b/packages/pipeline-void/README.md index a64b440c..ec8a1791 100644 --- a/packages/pipeline-void/README.md +++ b/packages/pipeline-void/README.md @@ -17,6 +17,7 @@ Accepts an optional `VoidStagesOptions` object: | `maxConcurrency` | 10 | Maximum concurrent in-flight executor batches (per-class stages only) | | `perClass` | — | Override per-class iteration for all five per-class stages | | `uriSpaces` | — | When provided, includes the object URI space stage | +| `vocabularies` | — | Additional vocabulary namespace URIs to detect beyond the built-in defaults | ```typescript import { voidStages } from '@lde/pipeline-void'; @@ -66,10 +67,10 @@ Global and domain-specific factories accept `VoidStageOptions` (`timeout`) and r | Factory | Description | | ------------------------ | --------------------------------------------------------------------------------------------------------------------------------- | -| `detectVocabularies()` | [`entity-properties.rq`](queries/entity-properties.rq) — Entity properties with automatic `void:vocabulary` detection | +| `detectVocabularies()` | [`entity-properties.rq`](queries/entity-properties.rq) — Entity properties with automatic `void:vocabulary` detection. Accepts `DetectVocabulariesOptions` with an optional `vocabularies` array to extend the built-in defaults. | | `uriSpaces(uriSpaceMap)` | [`object-uri-space.rq`](queries/object-uri-space.rq) — Object URI namespace linksets, aggregated against a provided URI space map | ## Executor decorators -- `VocabularyExecutor` — Wraps an executor; detects known vocabulary namespace prefixes in `void:property` quads and appends `void:vocabulary` triples. +- `VocabularyExecutor` — Wraps an executor; detects known vocabulary namespace prefixes in `void:property` quads and appends `void:vocabulary` triples. The built-in defaults are exported as `defaultVocabularies` (sourced from `@zazuko/prefixes`). - `UriSpaceExecutor` — Wraps an executor; consumes `void:Linkset` quads, matches each `void:objectsTarget` against configured URI space prefixes using `startsWith`, and aggregates triple counts per matched space. Emits `void:objectsTarget` pointing to the target dataset IRI (taken from the metadata quad subjects), not the raw prefix. Unmatched linksets are discarded. diff --git a/packages/pipeline-void/src/stage.ts b/packages/pipeline-void/src/stage.ts index 9c35fa71..7fbaf8e5 100644 --- a/packages/pipeline-void/src/stage.ts +++ b/packages/pipeline-void/src/stage.ts @@ -10,7 +10,10 @@ import { assertSafeIri } from '@lde/dataset'; import type { Quad } from '@rdfjs/types'; import { resolve, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; -import { VocabularyExecutor } from './vocabularyAnalyzer.js'; +import { + VocabularyExecutor, + defaultVocabularies, +} from './vocabularyAnalyzer.js'; import { UriSpaceExecutor } from './uriSpaceExecutor.js'; const queriesDir = resolve( @@ -48,6 +51,8 @@ export interface PerClassVoidStageOptions extends VoidStageOptions { export interface VoidStagesOptions extends PerClassVoidStageOptions { /** When provided, includes the object URI space stage using this map. */ uriSpaces?: ReadonlyMap; + /** Additional vocabulary namespace URIs to detect beyond the built-in defaults. */ + vocabularies?: readonly string[]; } async function createVoidStage( @@ -211,7 +216,14 @@ export function uriSpaces( }); } -export function detectVocabularies(options?: VoidStageOptions): Promise { +export interface DetectVocabulariesOptions extends VoidStageOptions { + /** Additional vocabulary namespace URIs to detect beyond the built-in defaults. */ + vocabularies?: readonly string[]; +} + +export function detectVocabularies( + options?: DetectVocabulariesOptions, +): Promise { return createVoidStage('entity-properties.rq', { ...options, executor: (query) => @@ -220,6 +232,9 @@ export function detectVocabularies(options?: VoidStageOptions): Promise { query, timeout: options?.timeout ?? 60_000, }), + options?.vocabularies + ? [...defaultVocabularies, ...options.vocabularies] + : undefined, ), }); } @@ -235,7 +250,11 @@ export function detectVocabularies(options?: VoidStageOptions): Promise { export async function voidStages( options?: VoidStagesOptions, ): Promise { - const { uriSpaces: uriSpaceMap, ...stageOptions } = options ?? {}; + const { + uriSpaces: uriSpaceMap, + vocabularies, + ...stageOptions + } = options ?? {}; return Promise.all([ // Global counting stages. @@ -258,7 +277,7 @@ export async function voidStages( // Other stages. detectLicenses(stageOptions), - detectVocabularies(stageOptions), + detectVocabularies({ ...stageOptions, vocabularies }), subjectUriSpaces(stageOptions), ...(uriSpaceMap ? [uriSpaces(uriSpaceMap, stageOptions)] : []), ]); diff --git a/packages/pipeline-void/src/vocabularyAnalyzer.ts b/packages/pipeline-void/src/vocabularyAnalyzer.ts index 4ea4bef3..ef0bd9fa 100644 --- a/packages/pipeline-void/src/vocabularyAnalyzer.ts +++ b/packages/pipeline-void/src/vocabularyAnalyzer.ts @@ -14,7 +14,7 @@ const VOID = 'http://rdfs.org/ns/void#'; const voidProperty = namedNode(`${VOID}property`); const voidVocabulary = namedNode(`${VOID}vocabulary`); -const defaultVocabularies: readonly string[] = [ +export const defaultVocabularies: readonly string[] = [ ...new Set(Object.values(prefixes)), ]; diff --git a/packages/pipeline-void/vite.config.ts b/packages/pipeline-void/vite.config.ts index f8433b58..29f6a1eb 100644 --- a/packages/pipeline-void/vite.config.ts +++ b/packages/pipeline-void/vite.config.ts @@ -12,7 +12,7 @@ export default mergeConfig( thresholds: { functions: 50, lines: 78.43, - branches: 65.95, + branches: 63.26, statements: 78.84, }, },