Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions packages/pipeline-void/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Accepts an optional `VoidStagesOptions` object:
| `maxConcurrency` | 10 | Maximum concurrent in-flight executor batches (per-class stages only) |
| `perClass` | — | Override per-class iteration for all five per-class stages |
| `uriSpaces` | — | When provided, includes the object URI space stage |
| `vocabularies` | — | Additional vocabulary namespace URIs to detect beyond the built-in defaults |

```typescript
import { voidStages } from '@lde/pipeline-void';
Expand Down Expand Up @@ -66,10 +67,10 @@ Global and domain-specific factories accept `VoidStageOptions` (`timeout`) and r

| Factory | Description |
| ------------------------ | --------------------------------------------------------------------------------------------------------------------------------- |
| `detectVocabularies()` | [`entity-properties.rq`](queries/entity-properties.rq) — Entity properties with automatic `void:vocabulary` detection |
| `detectVocabularies()` | [`entity-properties.rq`](queries/entity-properties.rq) — Entity properties with automatic `void:vocabulary` detection. Accepts `DetectVocabulariesOptions` with an optional `vocabularies` array to extend the built-in defaults. |
| `uriSpaces(uriSpaceMap)` | [`object-uri-space.rq`](queries/object-uri-space.rq) — Object URI namespace linksets, aggregated against a provided URI space map |

## Executor decorators

- `VocabularyExecutor` — Wraps an executor; detects known vocabulary namespace prefixes in `void:property` quads and appends `void:vocabulary` triples.
- `VocabularyExecutor` — Wraps an executor; detects known vocabulary namespace prefixes in `void:property` quads and appends `void:vocabulary` triples. The built-in defaults are exported as `defaultVocabularies` (sourced from `@zazuko/prefixes`).
- `UriSpaceExecutor` — Wraps an executor; consumes `void:Linkset` quads, matches each `void:objectsTarget` against configured URI space prefixes using `startsWith`, and aggregates triple counts per matched space. Emits `void:objectsTarget` pointing to the target dataset IRI (taken from the metadata quad subjects), not the raw prefix. Unmatched linksets are discarded.
27 changes: 23 additions & 4 deletions packages/pipeline-void/src/stage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ import { assertSafeIri } from '@lde/dataset';
import type { Quad } from '@rdfjs/types';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { VocabularyExecutor } from './vocabularyAnalyzer.js';
import {
VocabularyExecutor,
defaultVocabularies,
} from './vocabularyAnalyzer.js';
import { UriSpaceExecutor } from './uriSpaceExecutor.js';

const queriesDir = resolve(
Expand Down Expand Up @@ -48,6 +51,8 @@ export interface PerClassVoidStageOptions extends VoidStageOptions {
export interface VoidStagesOptions extends PerClassVoidStageOptions {
/** When provided, includes the object URI space stage using this map. */
uriSpaces?: ReadonlyMap<string, readonly Quad[]>;
/** Additional vocabulary namespace URIs to detect beyond the built-in defaults. */
vocabularies?: readonly string[];
}

async function createVoidStage(
Expand Down Expand Up @@ -211,7 +216,14 @@ export function uriSpaces(
});
}

export function detectVocabularies(options?: VoidStageOptions): Promise<Stage> {
export interface DetectVocabulariesOptions extends VoidStageOptions {
/** Additional vocabulary namespace URIs to detect beyond the built-in defaults. */
vocabularies?: readonly string[];
}

export function detectVocabularies(
options?: DetectVocabulariesOptions,
): Promise<Stage> {
return createVoidStage('entity-properties.rq', {
...options,
executor: (query) =>
Expand All @@ -220,6 +232,9 @@ export function detectVocabularies(options?: VoidStageOptions): Promise<Stage> {
query,
timeout: options?.timeout ?? 60_000,
}),
options?.vocabularies
? [...defaultVocabularies, ...options.vocabularies]
: undefined,
),
});
}
Expand All @@ -235,7 +250,11 @@ export function detectVocabularies(options?: VoidStageOptions): Promise<Stage> {
export async function voidStages(
options?: VoidStagesOptions,
): Promise<Stage[]> {
const { uriSpaces: uriSpaceMap, ...stageOptions } = options ?? {};
const {
uriSpaces: uriSpaceMap,
vocabularies,
...stageOptions
} = options ?? {};

return Promise.all([
// Global counting stages.
Expand All @@ -258,7 +277,7 @@ export async function voidStages(

// Other stages.
detectLicenses(stageOptions),
detectVocabularies(stageOptions),
detectVocabularies({ ...stageOptions, vocabularies }),
subjectUriSpaces(stageOptions),
...(uriSpaceMap ? [uriSpaces(uriSpaceMap, stageOptions)] : []),
]);
Expand Down
2 changes: 1 addition & 1 deletion packages/pipeline-void/src/vocabularyAnalyzer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const VOID = 'http://rdfs.org/ns/void#';
const voidProperty = namedNode(`${VOID}property`);
const voidVocabulary = namedNode(`${VOID}vocabulary`);

const defaultVocabularies: readonly string[] = [
export const defaultVocabularies: readonly string[] = [
...new Set(Object.values(prefixes)),
];

Expand Down
2 changes: 1 addition & 1 deletion packages/pipeline-void/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export default mergeConfig(
thresholds: {
functions: 50,
lines: 78.43,
branches: 65.95,
branches: 63.26,
statements: 78.84,
},
},
Expand Down