From a5556ce28a029d4543a9594517008356475e5548 Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Fri, 15 May 2026 10:56:49 -0600 Subject: [PATCH 1/8] Simplify interlinear model: remove InterlinearAlignment/InterlinearText, add ActiveProject --- src/types/interlinearizer.d.ts | 216 ++++++++++----------------------- 1 file changed, 62 insertions(+), 154 deletions(-) diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts index 79954ab3..b9cda9bb 100644 --- a/src/types/interlinearizer.d.ts +++ b/src/types/interlinearizer.d.ts @@ -22,19 +22,19 @@ declare module 'papi-shared-types' { /** * Creates a new interlinearizer project for the given source project. Called from the WebView - * after the user fills in the create-project modal. Returns the persisted project serialized as - * a JSON string, or undefined if project creation fails (failure is logged and surfaced as an - * error notification). + * after the user fills in the create-project modal. Returns the new project's UUID, or + * undefined if project creation fails (failure is logged and surfaced as an error + * notification). * * @param sourceProjectId Platform.Bible project ID of the source text to interlinearize. - * @param analysisWritingSystem BCP 47 tag for the language used in glosses and annotations - * (e.g. `'en'`). + * @param analysisLanguages BCP 47 tags for the languages used in glosses and annotations (e.g. + * `['en']`). * @param name Optional user-facing name for the project. * @param description Optional user-facing description for the project. */ 'interlinearizer.createProject': ( sourceProjectId: string, - analysisWritingSystem: string, + analysisLanguages: string[], name?: string, description?: string, ) => Promise; @@ -56,23 +56,16 @@ declare module 'papi-shared-types' { 'interlinearizer.deleteProject': (interlinearProjectId: string) => Promise; /** - * Opens the project-selector dialog in the Interlinearizer WebView. The backend registers this + * Opens the project-selector modal in the Interlinearizer WebView. The backend registers this * command to make it visible to the platform menu system; all logic executes in the WebView. */ 'interlinearizer.openSelectProjectModal': () => Promise; /** - * Opens the create-project dialog in the Interlinearizer WebView. The backend registers this + * Opens the create-project modal in the Interlinearizer WebView. The backend registers this * command to make it visible to the platform menu system; all logic executes in the WebView. */ - 'interlinearizer.openNewProjectModal': () => Promise; - - /** - * Opens the project-info modal for the active project in the Interlinearizer WebView. The - * backend registers this command to make it visible to the platform menu system; all logic - * executes in the WebView. - */ - 'interlinearizer.openProjectInfoModal': () => Promise; + 'interlinearizer.newProject': () => Promise; /** * Updates the metadata of an existing interlinearizer project. Returns the updated project as a @@ -81,14 +74,14 @@ declare module 'papi-shared-types' { * @param interlinearProjectId UUID of the interlinearizer project to update. * @param name New user-facing name; omit or pass `undefined` to clear. * @param description New user-facing description; omit or pass `undefined` to clear. - * @param analysisWritingSystem New BCP 47 analysis language tag; omit or pass empty to leave + * @param analysisLanguages New BCP 47 analysis language tags; omit or pass empty array to leave * unchanged (the field is required and cannot be cleared). */ 'interlinearizer.updateProjectMetadata': ( interlinearProjectId: string, name: string | undefined, description: string | undefined, - analysisWritingSystem?: string, + analysisLanguages?: string[], ) => Promise; } } @@ -101,15 +94,12 @@ declare module 'papi-shared-types' { * * Shape at a glance: * - * InterlinearAlignment - * ├─ source : InterlinearText — the input being analyzed - * ├─ target : InterlinearText — the analysis / output side - * └─ links : AlignmentLink[] + * ActiveProject + * ├─ project : InterlinearProject — persisted envelope (analysis + links) + * └─ source : Book[] — text layer (rebuilt from USJ at runtime) + * └─ Segment[] → Token[] * - * InterlinearText - * ├─ books : Book[] — text layer (baseline) - * │ └─ Segment[] → Token[] - * └─ analysis : TextAnalysis — analysis layer (flat) + * InterlinearProject.analysis : TextAnalysis — analysis layer (flat) * ├─ segmentAnalyses : SegmentAnalysis[] (per-segment translations) * ├─ tokenAnalyses : TokenAnalysis[] (parse + 1:1 gloss) * └─ phrases : Phrase[] (multi-token gloss) @@ -131,10 +121,9 @@ declare module 'papi-shared-types' { * - `IMoForm` (allomorph) is not exported; no allomorph service. * - `IMoMorphSynAnalysis` (MSA) is not exported; no MSA service. * - * Punctuation tokens are first-class citizens of the text layer on both source and target sides — - * they are stored in `Segment.tokens` so the baseline text can be reconstructed faithfully. They - * are simply omitted from the analysis layer's `tokenAnalyses` (rather than stored there with empty - * analyses). + * Punctuation tokens are first-class citizens of the text layer — they are stored in + * `Segment.tokens` so the baseline text can be reconstructed faithfully. They are simply omitted + * from the analysis layer's `tokenAnalyses` (rather than stored there with empty analyses). * * Staleness detection: analysis records and alignment endpoints carry a `tokenSnapshot` of the * token's surface text at analysis time. When the baseline changes, consumers compare the snapshot @@ -303,112 +292,7 @@ declare module 'interlinearizer' { } // --------------------------------------------------------------------------- - // §1 InterlinearAlignment - // --------------------------------------------------------------------------- - - /** - * Top-level bilingual container pairing two interlinear texts — `source` (the input being - * analyzed) and `target` (the analysis / output side) — with the alignment links between them. - * - * The sides carry directional meaning: `source` is what the workflow takes in, `target` is what - * the workflow produces or aligns toward. The model types themselves are identical on both sides; - * that directional contract is enforced by the application layer, not by shape. Example - * pairings: - * - * - Glossing a vernacular draft (source) in an analyst language such as English (target); - * - Aligning a Greek / Hebrew resource text (source) against a vernacular translation (target); - * - Aligning one translation (source) against another (target). - * - * Each side carries its own text (books → segments → tokens) and, optionally, a flat analysis - * layer (`segmentAnalyses` / `tokenAnalyses` / `phrases`). `AlignmentLink`s bridge tokens or - * morphemes from source to target. - * - * Source-system mapping: - * - * - LCM: no native bilingual alignment model. Constructed by pairing two `InterlinearText` - * instances produced from LCM / companion data; the workflow decides which is `source` and - * which is `target`. - * - Paratext: not directly represented. Can be constructed from parallel projects that share the - * same versification. - * - BT Extension: one `Translation` scoped to two sides (`Translation.sideNum`: 1 / 2). By BT - * convention side 1 is the source and side 2 is the target; each side becomes an - * `InterlinearText`. `Alignment` records become `AlignmentLink`s. - */ - export interface InterlinearAlignment { - /** Unique identifier for this alignment pair. */ - id: string; - - /** - * The input being analyzed — for example a vernacular draft being glossed, a Greek / Hebrew - * resource text being aligned against a translation, or one translation being aligned against - * another. - */ - source: InterlinearText; - - /** - * The analysis / output side — for example an analyst-language gloss, a back translation, or - * the translation being aligned against the source. - */ - target: InterlinearText; - - /** - * Token- or morpheme-level alignment links connecting endpoints in the source interlinear to - * endpoints in the target interlinear. Empty when no alignment has been computed or imported. - */ - links: AlignmentLink[]; - } - - // --------------------------------------------------------------------------- - // §2 InterlinearText - // --------------------------------------------------------------------------- - - /** - * One side of an interlinear alignment — the baseline text plus its parallel analysis layer. - * - * The text layer (`books`) mirrors the underlying document's structure. The analysis layer - * (`analysis`) has the same divisions but carries morpheme / gloss / phrase information and - * references into the Lexicon extension. - * - * Source-system mapping: - * - * - LCM: one `IScripture` instance (singleton per project). Text layer from `IScrBook` / - * `IScrSection` / `IScrTxtPara` content; analysis layer from `IWfiWordform` / `IWfiAnalysis` / - * `IWfiGloss` referenced by `ISegment.AnalysesRS`. `analysisLanguages[]` is the set of - * languages present on `IWfiGloss.Form`. - * - Paratext: merged from per-book, per-language `InterlinearData` files - * (`Interlinear_{language}/Interlinear_{language}_{book}.xml`). Text layer from USFM; analysis - * layer from `ClusterData` + `LexemeCluster` + `WordAnalysis`. Each file's `GlossLanguage` is - * added to `analysisLanguages[]`. - * - BT Extension: one side of a `Translation` (a single `sideNum` value). Text layer from `Token` / - * `Instance` records; analysis layer synthesized from per-token `gloss` / `lemmaText` / - * `senseIds`. Analysis is typically in a single language. - */ - export interface InterlinearText { - /** Unique identifier for this interlinear text. */ - id: string; - - /** Writing system of the baseline text. */ - writingSystem: string; - - /** - * Writing systems in which analyses are provided (e.g. `["en", "fr"]`). A single text can hold - * analyses in multiple languages. - */ - analysisLanguages: string[]; - - /** Baseline text: books of scripture (or other texts). */ - books: Book[]; - - /** - * Parallel analysis layer. Absent (not present) when the text has never been analyzed; - * present-but-empty (`segmentAnalyses: [], tokenAnalyses: [], phrases: []`) when analysis has - * been initialized but no records have been added yet. - */ - analysis?: TextAnalysis; - } - - // --------------------------------------------------------------------------- - // §3 Text layer — Book, Segment, Token + // §1 Text layer — Book, Segment, Token // --------------------------------------------------------------------------- /** @@ -461,7 +345,7 @@ declare module 'interlinearizer' { */ export interface Segment { /** - * Unique within the owning `InterlinearText` — used as the cross-reference key by + * Unique within the owning `Book` — used as the cross-reference key by * `SegmentAnalysis.segmentId`. */ id: string; @@ -520,8 +404,8 @@ declare module 'interlinearizer' { */ export interface Token { /** - * Unique within the owning `InterlinearText` — used as the cross-reference key by - * `TokenAnalysis.tokenId`, `Phrase.tokenIds`, and `AlignmentEndpoint.tokenId`. + * Unique within the owning `Book` — used as the cross-reference key by `TokenAnalysis.tokenId`, + * `Phrase.tokenIds`, and `AlignmentEndpoint.tokenId`. */ id: string; @@ -554,11 +438,11 @@ declare module 'interlinearizer' { } // --------------------------------------------------------------------------- - // §4 Analysis layer — TextAnalysis, SegmentAnalysis + // §2 Analysis layer — TextAnalysis, SegmentAnalysis // --------------------------------------------------------------------------- /** - * The analysis layer for an `InterlinearText`. + * The analysis layer for an `InterlinearProject`. * * Flat by design — it does **not** mirror the text layer's book / segment nesting. Every record * carries an id reference back to its text-layer counterpart (`segmentId` / `tokenId`). Consumers @@ -647,7 +531,7 @@ declare module 'interlinearizer' { /** * Reference to the corresponding `Segment.id` in the text layer (unique within the owning - * `InterlinearText`). + * `Book`). */ segmentId: string; @@ -680,7 +564,7 @@ declare module 'interlinearizer' { } // --------------------------------------------------------------------------- - // §5 TokenAnalysis — parse + 1:1 gloss + // §3 TokenAnalysis — parse + 1:1 gloss // --------------------------------------------------------------------------- /** @@ -723,7 +607,7 @@ declare module 'interlinearizer' { */ id: string; - /** Reference to the `Token.id` being analyzed (unique within the owning `InterlinearText`). */ + /** Reference to the `Token.id` being analyzed (unique within the owning `Book`). */ tokenId: string; /** Ordered morpheme breakdown. Omitted for whole-word analyses. */ @@ -855,7 +739,7 @@ declare module 'interlinearizer' { } // --------------------------------------------------------------------------- - // §6 Phrase — multi-token gloss unit + // §4 Phrase — multi-token gloss unit // --------------------------------------------------------------------------- /** @@ -946,7 +830,7 @@ declare module 'interlinearizer' { }; // --------------------------------------------------------------------------- - // §7 AlignmentLink, AlignmentEndpoint + // §5 AlignmentLink, AlignmentEndpoint // --------------------------------------------------------------------------- /** @@ -967,7 +851,7 @@ declare module 'interlinearizer' { * Eflomal-generated alignments leave `originNum` and `statusNum` unset (default 0, CREATED). */ export interface AlignmentLink { - /** Unique within the owning `InterlinearAlignment` — stable reference for this link. */ + /** Unique within the owning `InterlinearProject` — stable reference for this link. */ id: string; /** Source-side endpoints (one or more tokens / morphemes). */ @@ -1055,7 +939,7 @@ declare module 'interlinearizer' { ); // --------------------------------------------------------------------------- - // §8 InterlinearProject — persisted project envelope + // §6 InterlinearProject — persisted project envelope // --------------------------------------------------------------------------- /** @@ -1089,16 +973,16 @@ declare module 'interlinearizer' { sourceProjectId: string; /** - * BCP 47 tag for the language used in glosses and annotations (e.g. `'en'`). Populates + * BCP 47 tags for the languages used in glosses and annotations (e.g. `['en']`). Populates * `MultiString` keys in `TokenAnalysis`, `SegmentAnalysis`, and `Phrase` records. */ - analysisWritingSystem: string; + analysisLanguages: string[]; - /** Source-side analysis layer. Empty at creation; populated as the user annotates tokens. */ - sourceAnalysis: TextAnalysis; + /** Platform.Bible project ID for the target text, if a target-side project is linked. */ + targetProjectId?: string; - /** Target-side analysis layer. Empty at creation; populated as the user annotates tokens. */ - targetAnalysis: TextAnalysis; + /** Analysis layer. Empty at creation; populated as the user annotates tokens. */ + analysis: TextAnalysis; /** * Token- or morpheme-level alignment links. Empty at creation; populated as the user aligns @@ -1106,4 +990,28 @@ declare module 'interlinearizer' { */ links: AlignmentLink[]; } + + // --------------------------------------------------------------------------- + // §7 ActiveProject — runtime pairing of project envelope and source text + // --------------------------------------------------------------------------- + + /** + * The runtime object for an open interlinearizer project. Pairs the persisted + * {@link InterlinearProject} envelope with the reconstructed source-text hierarchy. + * + * `source` is rebuilt from Platform.Bible's USJ on each load and is never serialized. All + * annotation and alignment mutations target `project.analysis` and `project.links`; saving is + * done by writing those fields back to storage via `saveProjectAnalysis`. + */ + export interface ActiveProject { + /** The persisted project envelope. Mutations target `project.analysis` and `project.links`. */ + project: InterlinearProject; + + /** + * The reconstructed source books, built from Platform.Bible USJ on load. Never serialized — + * rebuilt on every activation. Typically one book per scripture book code; multiple books may + * be present when the UI has prefetched adjacent books. + */ + source: Book[]; + } } From fe04619e4f17c63ff9063f726d3ab812cc696a05 Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Fri, 15 May 2026 10:56:57 -0600 Subject: [PATCH 2/8] Fix model gaps for lossless LCM / PT9 / BT Extension import --- src/types/interlinearizer.d.ts | 109 +++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 39 deletions(-) diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts index b9cda9bb..b9b9a9ae 100644 --- a/src/types/interlinearizer.d.ts +++ b/src/types/interlinearizer.d.ts @@ -27,14 +27,19 @@ declare module 'papi-shared-types' { * notification). * * @param sourceProjectId Platform.Bible project ID of the source text to interlinearize. - * @param analysisLanguages BCP 47 tags for the languages used in glosses and annotations (e.g. - * `['en']`). + * @param analysisLanguages BCP 47 tags for all languages used in glosses and annotations (e.g. + * `['en']`). LCM: one per writing system present on `IWfiGloss.Form`. Paratext: one per + * merged `GlossLanguage` file. BT Extension: typically one language. + * @param targetProjectId Optional Platform.Bible project ID of the target text. Required for BT + * Extension projects so that `AlignmentLink.targetEndpoints` can be resolved at runtime. + * Omitted for analysis-only projects (LCM, PT9 single-sided). * @param name Optional user-facing name for the project. * @param description Optional user-facing description for the project. */ 'interlinearizer.createProject': ( sourceProjectId: string, analysisLanguages: string[], + targetProjectId?: string, name?: string, description?: string, ) => Promise; @@ -74,14 +79,17 @@ declare module 'papi-shared-types' { * @param interlinearProjectId UUID of the interlinearizer project to update. * @param name New user-facing name; omit or pass `undefined` to clear. * @param description New user-facing description; omit or pass `undefined` to clear. - * @param analysisLanguages New BCP 47 analysis language tags; omit or pass empty array to leave - * unchanged (the field is required and cannot be cleared). + * @param analysisLanguages New BCP 47 analysis language tags; omit or pass an empty array to + * leave unchanged (the field is required and cannot be cleared). + * @param targetProjectId New target-project ID; omit or pass `undefined` to clear (removes the + * target-side text binding). */ 'interlinearizer.updateProjectMetadata': ( interlinearProjectId: string, name: string | undefined, description: string | undefined, analysisLanguages?: string[], + targetProjectId?: string, ) => Promise; } } @@ -96,7 +104,8 @@ declare module 'papi-shared-types' { * * ActiveProject * ├─ project : InterlinearProject — persisted envelope (analysis + links) - * └─ source : Book[] — text layer (rebuilt from USJ at runtime) + * ├─ source : Book[] — source text layer (rebuilt from USJ at runtime) + * └─ target? : Book[] — target text layer (rebuilt from USJ at runtime; absent for analysis-only projects) * └─ Segment[] → Token[] * * InterlinearProject.analysis : TextAnalysis — analysis layer (flat) @@ -571,10 +580,10 @@ declare module 'interlinearizer' { * Analysis of a single token: a word-level (1:1) gloss plus optional morpheme-level parse. * * `gloss` is a free-form gloss string for the token (keyed by analysis-language tag). - * `glossSenseRef` alternatively resolves the gloss through a specific `ISense` in the Lexicon - * extension — when set, the rendered gloss is the sense's gloss text and may be refreshed - * automatically if the lexicon is edited. Both may be set concurrently: when both are present, - * `gloss` serves as a local override that takes precedence for rendering. + * `glossSenseRef` resolves the gloss through a specific `ISense` in the Lexicon extension — when + * set, the sense's gloss text can be surfaced and refreshed automatically if the lexicon is + * edited. Both may be present simultaneously; when they are, `gloss` takes precedence for + * rendering (the local override wins over the lexicon-derived value). * * `morphemes` carries the parse information. Each morpheme links to the Lexicon extension via * `entryRef` / `senseRef`. @@ -592,8 +601,7 @@ declare module 'interlinearizer' { * - Paratext: `LexemeCluster` + `WordAnalysis`. `gloss` resolved from the selected * `LexiconSense.Gloss` (per-language strings). `morphemes` from the `Lexeme[]` within * `WordAnalysis` when `LexemeCluster.Type = WordParse`. Paratext stores POS on the lexeme, not - * per-analysis. `status` / `confidence` inferred from `InterlinearLexeme.IsGuess` and - * `.Score`. + * per-analysis. `status` / `confidence` inferred from `InterlinearLexeme.IsGuess` and `.Score`. * - BT Extension: synthesized per-token from `gloss` / `lemmaText` / `senseIds`. BT Extension * stores gloss per-token rather than as shared analysis objects — each token gets its own * `TokenAnalysis`. `status` from `Instance.termStatusNum` (BiblicalTermStatus). `confidence` @@ -655,16 +663,15 @@ declare module 'interlinearizer' { tokenSnapshot?: string; /** - * Free-form gloss string for this token, keyed by analysis-language BCP 47 tag. May coexist - * with `glossSenseRef` when the user has both a local override and a lexicon-backed sense — the - * local gloss takes precedence for rendering in that case. + * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over + * `glossSenseRef` when both are present. */ gloss?: MultiString; /** - * Resolves the gloss through a specific `ISense` in the Lexicon extension, enabling automatic - * refresh when the lexicon is edited. May coexist with `gloss` when the user maintains both a - * lexicon-linked sense and a local override. + * Reference to the `ISense` in the Lexicon extension whose gloss text this analysis uses. May + * coexist with `gloss`; when both are present, `gloss` is the active rendering value and + * `glossSenseRef` is retained so the lexicon link is not lost. */ glossSenseRef?: SenseRef; }; @@ -754,11 +761,10 @@ declare module 'interlinearizer' { * Each token may still carry its own `TokenAnalysis` alongside the phrase; the phrase contributes * the combined-unit gloss. * - * `gloss` is a free-form phrase gloss. `senseRef` alternatively points at a lexicon sense when - * the phrase is a multi-word lexical entry — the Lexicon extension supports both kinds via - * `IEntry.morphType = Phrase` (contiguous) or `DiscontiguousPhrase` (e.g. "ne … pas"). Both may - * be set concurrently: when both are present, `gloss` serves as a local override that takes - * precedence for rendering. + * `gloss` is a free-form phrase gloss. `senseRef` points at a lexicon sense when the phrase is a + * multi-word lexical entry — the Lexicon extension supports both kinds via `IEntry.morphType = + * Phrase` (contiguous) or `DiscontiguousPhrase` (e.g. "ne … pas"). Both may be present + * simultaneously; when they are, `gloss` takes precedence for rendering. * * Provenance fields (`producer`, `sourceUser`, `confidence`, `status`) let a suggestion engine * record proposed phrases that a user can then approve or reject, enabling automated recognition @@ -814,17 +820,15 @@ declare module 'interlinearizer' { tokenSnapshots?: [string, ...string[]]; /** - * Free-form phrase gloss keyed by analysis-language BCP 47 tag. May coexist with `senseRef` - * when the user has both a local override and a lexicon-backed sense — the local gloss takes - * precedence for rendering in that case. + * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over + * `senseRef` when both are present. */ gloss?: MultiString; /** - * Points at a multi-word lexical entry in the Lexicon extension (e.g. `IEntry.morphType = - * Phrase` or `DiscontiguousPhrase`), enabling automatic gloss refresh when the lexicon is - * edited. May coexist with `gloss` when the user maintains both a lexicon-linked sense and a - * local override. + * Reference to the `ISense` in the Lexicon extension this phrase maps to. May coexist with + * `gloss`; when both are present, `gloss` is the active rendering value and `senseRef` is + * retained so the lexicon link is not lost. */ senseRef?: SenseRef; }; @@ -944,7 +948,7 @@ declare module 'interlinearizer' { /** * The storage envelope for one interlinearizer project. Multiple projects may exist for the same - * pair of Platform.Bible projects (e.g. different analysis languages). + * source project (e.g. different analysis languages, or different target alignments). * * The token hierarchy (`Book` / `Segment` / `Token`) is **not** stored here — it is rebuilt from * Platform.Bible's USJ on each load. Only the analysis data and alignment links are persisted. @@ -973,14 +977,29 @@ declare module 'interlinearizer' { sourceProjectId: string; /** - * BCP 47 tags for the languages used in glosses and annotations (e.g. `['en']`). Populates + * Platform.Bible project ID for the target text. Present only for bilateral alignment projects + * (e.g. BT Extension imports) where `AlignmentLink.targetEndpoints` must resolve to tokens in a + * second text. Omitted for analysis-only projects (LCM, PT9 single-sided glossing). + * + * When present, the `ActiveProject.target` books are rebuilt from this project's USJ on load, + * exactly as `ActiveProject.source` is rebuilt from `sourceProjectId`. + */ + targetProjectId?: string; + + /** + * BCP 47 tags for all languages used in glosses and annotations (e.g. `['en']`). Populates * `MultiString` keys in `TokenAnalysis`, `SegmentAnalysis`, and `Phrase` records. + * + * Source-system mapping: + * + * - LCM: the set of writing systems present on `IWfiGloss.Form` (one tag per analysis language in + * the project). + * - Paratext: one tag per merged `GlossLanguage` file + * (`Interlinear_{language}/Interlinear_{language}_{book}.xml`). + * - BT Extension: typically a single language; set from the per-token `gloss` writing system. */ analysisLanguages: string[]; - /** Platform.Bible project ID for the target text, if a target-side project is linked. */ - targetProjectId?: string; - /** Analysis layer. Empty at creation; populated as the user annotates tokens. */ analysis: TextAnalysis; @@ -992,16 +1011,21 @@ declare module 'interlinearizer' { } // --------------------------------------------------------------------------- - // §7 ActiveProject — runtime pairing of project envelope and source text + // §7 ActiveProject — runtime pairing of project envelope and text layers // --------------------------------------------------------------------------- /** * The runtime object for an open interlinearizer project. Pairs the persisted - * {@link InterlinearProject} envelope with the reconstructed source-text hierarchy. + * {@link InterlinearProject} envelope with the reconstructed text hierarchies. + * + * `source` and `target` are rebuilt from Platform.Bible's USJ on each load and are never + * serialized. All annotation and alignment mutations target `project.analysis` and + * `project.links`; saving is done by writing those fields back to storage via + * `saveProjectAnalysis`. * - * `source` is rebuilt from Platform.Bible's USJ on each load and is never serialized. All - * annotation and alignment mutations target `project.analysis` and `project.links`; saving is - * done by writing those fields back to storage via `saveProjectAnalysis`. + * `target` is present only when `project.targetProjectId` is set (bilateral alignment projects + * such as BT Extension imports). When present, `AlignmentLink.targetEndpoints` token IDs resolve + * against these books; when absent, only `sourceEndpoints` can be resolved. */ export interface ActiveProject { /** The persisted project envelope. Mutations target `project.analysis` and `project.links`. */ @@ -1013,5 +1037,12 @@ declare module 'interlinearizer' { * be present when the UI has prefetched adjacent books. */ source: Book[]; + + /** + * The reconstructed target books, built from `project.targetProjectId`'s USJ on load. Present + * only when `project.targetProjectId` is set; absent for analysis-only projects (LCM, PT9). + * Never serialized — rebuilt on every activation alongside `source`. + */ + target?: Book[]; } } From dcaf9fbdef014b6abacd0af361fd8a9866b41019 Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Fri, 15 May 2026 10:57:14 -0600 Subject: [PATCH 3/8] Make `analysisLanguages` required --- src/types/interlinearizer.d.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts index b9b9a9ae..b378fdcb 100644 --- a/src/types/interlinearizer.d.ts +++ b/src/types/interlinearizer.d.ts @@ -79,8 +79,8 @@ declare module 'papi-shared-types' { * @param interlinearProjectId UUID of the interlinearizer project to update. * @param name New user-facing name; omit or pass `undefined` to clear. * @param description New user-facing description; omit or pass `undefined` to clear. - * @param analysisLanguages New BCP 47 analysis language tags; omit or pass an empty array to - * leave unchanged (the field is required and cannot be cleared). + * @param analysisLanguages New BCP 47 analysis language tags. Must be a non-empty array; pass + * the current value to leave it unchanged (the field is required and cannot be cleared). * @param targetProjectId New target-project ID; omit or pass `undefined` to clear (removes the * target-side text binding). */ @@ -88,7 +88,7 @@ declare module 'papi-shared-types' { interlinearProjectId: string, name: string | undefined, description: string | undefined, - analysisLanguages?: string[], + analysisLanguages: string[], targetProjectId?: string, ) => Promise; } @@ -601,7 +601,8 @@ declare module 'interlinearizer' { * - Paratext: `LexemeCluster` + `WordAnalysis`. `gloss` resolved from the selected * `LexiconSense.Gloss` (per-language strings). `morphemes` from the `Lexeme[]` within * `WordAnalysis` when `LexemeCluster.Type = WordParse`. Paratext stores POS on the lexeme, not - * per-analysis. `status` / `confidence` inferred from `InterlinearLexeme.IsGuess` and `.Score`. + * per-analysis. `status` / `confidence` inferred from `InterlinearLexeme.IsGuess` and + * `.Score`. * - BT Extension: synthesized per-token from `gloss` / `lemmaText` / `senseIds`. BT Extension * stores gloss per-token rather than as shared analysis objects — each token gets its own * `TokenAnalysis`. `status` from `Instance.termStatusNum` (BiblicalTermStatus). `confidence` From 0d23767815daee4b757a0e35af64ba7c1ddaf23b Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Fri, 15 May 2026 10:57:22 -0600 Subject: [PATCH 4/8] Add comments about mapping of BT Extension's `sideNum` --- src/types/interlinearizer.d.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts index b378fdcb..771088b7 100644 --- a/src/types/interlinearizer.d.ts +++ b/src/types/interlinearizer.d.ts @@ -984,6 +984,11 @@ declare module 'interlinearizer' { * * When present, the `ActiveProject.target` books are rebuilt from this project's USJ on load, * exactly as `ActiveProject.source` is rebuilt from `sourceProjectId`. + * + * BT Extension: corresponds to one `Translation` scoped to two sides (`Translation.sideNum`). + * By BT convention `sideNum = 1` is the source and `sideNum = 2` is the target; + * `sourceProjectId` maps to the side-1 project and `targetProjectId` maps to the side-2 + * project. */ targetProjectId?: string; @@ -1027,6 +1032,9 @@ declare module 'interlinearizer' { * `target` is present only when `project.targetProjectId` is set (bilateral alignment projects * such as BT Extension imports). When present, `AlignmentLink.targetEndpoints` token IDs resolve * against these books; when absent, only `sourceEndpoints` can be resolved. + * + * BT Extension: `source` corresponds to `Translation.sideNum = 1` and `target` to `sideNum = 2`, + * following BT's convention that side 1 is the input being analyzed and side 2 is the output. */ export interface ActiveProject { /** The persisted project envelope. Mutations target `project.analysis` and `project.links`. */ From d5b280d3c491cc4b8f37cc594dcb76ff6073df93 Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Fri, 15 May 2026 10:57:30 -0600 Subject: [PATCH 5/8] Update docs/schema --- src/types/interlinearizer.d.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts index 771088b7..a5854b95 100644 --- a/src/types/interlinearizer.d.ts +++ b/src/types/interlinearizer.d.ts @@ -54,9 +54,10 @@ declare module 'papi-shared-types' { 'interlinearizer.getProjectsForSource': (sourceProjectId: string) => Promise; /** - * Deletes an interlinearizer project by UUID. No-ops silently if the project does not exist. + * Deletes an interlinearizer project by UUID. * * @param interlinearProjectId UUID of the interlinearizer project to delete. + * @throws {RangeError} If the project ID is not found in the stored index. */ 'interlinearizer.deleteProject': (interlinearProjectId: string) => Promise; From ab90e2c7b34d6a503a2beac79fd1bf0dc1248a7a Mon Sep 17 00:00:00 2001 From: alex-rawlings-yyc Date: Fri, 15 May 2026 10:57:38 -0600 Subject: [PATCH 6/8] Further refinement; please see updated description --- src/types/interlinearizer.d.ts | 382 +++++++++++++++------------------ 1 file changed, 174 insertions(+), 208 deletions(-) diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts index a5854b95..949bbbd6 100644 --- a/src/types/interlinearizer.d.ts +++ b/src/types/interlinearizer.d.ts @@ -103,22 +103,23 @@ declare module 'papi-shared-types' { * * Shape at a glance: * - * ActiveProject - * ├─ project : InterlinearProject — persisted envelope (analysis + links) - * ├─ source : Book[] — source text layer (rebuilt from USJ at runtime) - * └─ target? : Book[] — target text layer (rebuilt from USJ at runtime; absent for analysis-only projects) - * └─ Segment[] → Token[] + * InterlinearProject + * ├─ sourceProjectId + * ├─ targetProjectId? — absent for analysis-only projects (LCM, PT9) + * ├─ analysisLanguages : string[] + * ├─ analysis : TextAnalysis + * └─ links? : AlignmentLink[] * - * InterlinearProject.analysis : TextAnalysis — analysis layer (flat) - * ├─ segmentAnalyses : SegmentAnalysis[] (per-segment translations) - * ├─ tokenAnalyses : TokenAnalysis[] (parse + 1:1 gloss) - * └─ phrases : Phrase[] (multi-token gloss) + * ActiveProject + * ├─ project : InterlinearProject + * ├─ source : Book[] + * └─ target? : Book[] — present only when targetProjectId is set * * The analysis layer is **flat** — not a mirror of the text layer's book / segment nesting. Every * analysis record carries an id reference back to its text-layer counterpart (`segmentId` / - * `tokenId`). Consumers index by id at load time (`Map`, etc.) to render - * a segment at a time. This keeps the layer's containers honest — none exist just to mirror a - * parent — and makes it trivial to add analyses without touching the text hierarchy. + * `tokenRef`). Consumers index by id at load time (`Map`, etc.) to + * render a segment at a time. This keeps the layer's containers honest — none exist just to mirror + * a parent — and makes it trivial to add analyses without touching the text hierarchy. * * Lexical information (entries, senses, allomorphs, grammar / MSA, …) is **not** stored in this * model. It lives in the Lexicon extension (`lexicon`); this model references it via `EntryRef` / @@ -135,10 +136,10 @@ declare module 'papi-shared-types' { * `Segment.tokens` so the baseline text can be reconstructed faithfully. They are simply omitted * from the analysis layer's `tokenAnalyses` (rather than stored there with empty analyses). * - * Staleness detection: analysis records and alignment endpoints carry a `tokenSnapshot` of the - * token's surface text at analysis time. When the baseline changes, consumers compare the snapshot - * against the current `Token.surfaceText` and flip `status` to `'stale'` on mismatch to prompt - * re-review. + * Staleness detection: `TokenAnalysis` records carry a `tokenSnapshot` of the token's surface text + * at analysis time. `AlignmentEndpoint` records carry an equivalent snapshot via + * `token.surfaceText`. When the baseline changes, consumers compare the snapshot against the + * current `Token.surfaceText` and flip `status` to `'stale'` on mismatch to prompt re-review. */ declare module 'interlinearizer' { // --------------------------------------------------------------------------- @@ -183,10 +184,9 @@ declare module 'interlinearizer' { export type MultiString = Record; /** - * A character-level scripture reference anchored to a specific position within a verse's baseline - * text. When `charIndex` is absent the reference is verse-level only — it identifies the verse as - * a whole without pinpointing a specific character. Consumers must treat an absent `charIndex` as - * "beginning of verse" when a character position is required. + * A verse-level scripture reference that may optionally be anchored to a character position + * within the verse's baseline text. When `charIndex` is absent the reference is verse-level + * only. */ export interface ScriptureRef { /** 3-letter SIL book code (e.g. `"GEN"`). */ @@ -355,22 +355,28 @@ declare module 'interlinearizer' { */ export interface Segment { /** - * Unique within the owning `Book` — used as the cross-reference key by - * `SegmentAnalysis.segmentId`. + * Stable identifier for this segment, unique within the owning `InterlinearProject`. In + * practice the id is project-wide unique because it is set to the verse SID (e.g. `"GEN 1:1"`). + * Used as the cross-reference key by `SegmentAnalysis.segmentId`. */ id: string; - /** Inclusive start of the text range, anchored to a character position within its verse. */ + /** + * Inclusive start of the text range. `charIndex` is set when a sub-verse character offset is + * known. + */ startRef: ScriptureRef; - /** Inclusive end of the text range, anchored to a character position within its verse. */ + /** + * Inclusive end of the text range. `charIndex` is set when a sub-verse character offset is + * known. + */ endRef: ScriptureRef; /** - * Raw text of the segment. Required — token character offsets (`Token.charStart` / - * `Token.charEnd`) are expressed relative to this string, so it must be present for the text - * layer to be interpretable, particularly for scriptio continua scripts where token boundaries - * are not derivable from whitespace. + * Token character offsets (`Token.charStart` / `Token.charEnd`) are expressed relative to this + * string, so it must be present for the text layer to be interpretable, particularly for + * scriptio continua scripts where token boundaries are not derivable from whitespace. */ baselineText: string; @@ -414,15 +420,18 @@ declare module 'interlinearizer' { */ export interface Token { /** - * Unique within the owning `Book` — used as the cross-reference key by `TokenAnalysis.tokenId`, - * `Phrase.tokenIds`, and `AlignmentEndpoint.tokenId`. + * Stable identifier for this token, unique within the owning `InterlinearProject`. In practice + * the ref is project-wide unique because it embeds the verse SID and the token's character + * offset (e.g. `"GEN 1:1:0"` for the first token in Genesis 1:1). Used as the cross-reference + * key by `TokenAnalysis.tokenRef`, `PhraseAnalysis.tokenRefs`, and + * `AlignmentEndpoint.token.tokenRef`. */ - id: string; + ref: string; /** The token's text as it appears in the baseline. */ surfaceText: string; - /** Writing system of `surfaceText`. */ + /** BCP 47 writing-system tag for `surfaceText`. */ writingSystem: string; /** @@ -455,9 +464,9 @@ declare module 'interlinearizer' { * The analysis layer for an `InterlinearProject`. * * Flat by design — it does **not** mirror the text layer's book / segment nesting. Every record - * carries an id reference back to its text-layer counterpart (`segmentId` / `tokenId`). Consumers - * that need segment-local views build `Map` / `Map` at - * load time. + * carries an id reference back to its text-layer counterpart (`segmentId` / `tokenRef`). + * Consumers that need segment-local views build `Map` / `Map` at load time. * * Keeping this layer flat avoids ceremonial container types whose only purpose is to mirror a * parent, and makes it trivial to add or remove analyses without touching the text hierarchy. @@ -481,23 +490,25 @@ declare module 'interlinearizer' { * * **Invariant:** at most one `SegmentAnalysis` per `segmentId` has `status: 'approved'`. That * entry is the canonical segment-level analysis for rendering; alternates are available to - * review workflows via the other statuses. + * review workflows via the other statuses. This invariant is the caller's responsibility to + * maintain; no runtime enforcement exists. */ segmentAnalyses: SegmentAnalysis[]; /** * Token-level analyses, flat across the whole text. Each entry references its token by - * `tokenId`; the text layer keeps every token (words and punctuation) but this list typically + * `tokenRef`; the text layer keeps every token (words and punctuation) but this list typically * includes only the tokens being analyzed — punctuation is omitted rather than stored with * empty analyses. * - * Competing analyses are permitted: a single `tokenId` may have multiple `TokenAnalysis` + * Competing analyses are permitted: a single `tokenRef` may have multiple `TokenAnalysis` * entries (e.g. a parser's suggestion alongside a human's choice), distinguished by `status` / * `confidence` / `producer`. * - * **Invariant:** at most one `TokenAnalysis` per `tokenId` has `status: 'approved'`. That entry - * is the canonical analysis for rendering; alternates are available to review workflows via the - * other statuses (`'suggested'`, `'candidate'`, `'rejected'`, `'stale'`). + * **Invariant:** at most one `TokenAnalysis` per `tokenRef` has `status: 'approved'`. That + * entry is the canonical analysis for rendering; alternates are available to review workflows + * via the other statuses (`'suggested'`, `'candidate'`, `'rejected'`, `'stale'`). This + * invariant is the caller's responsibility to maintain; no runtime enforcement exists. */ tokenAnalyses: TokenAnalysis[]; @@ -506,17 +517,51 @@ declare module 'interlinearizer' { * disjoint tokens and carries its own gloss. A phrase's member tokens may span multiple * segments. * - * Competing phrases are permitted: a given `tokenId` may appear in multiple `Phrase` records - * (e.g. a suggested phrase grouping plus a human-approved one) distinguished by `status`. + * Competing phrases are permitted: a given `tokenRef` may appear in multiple `PhraseAnalysis` + * records (e.g. a suggested phrase grouping plus a human-approved one) distinguished by + * `status`. * * **Invariants:** * - * - At most one `Phrase` containing a given `tokenId` has `status: Approved`. That phrase is - * canonical for rendering. - * - A token may carry both a `TokenAnalysis` _and_ an approved `Phrase`; the per-token parse - * coexists with the phrase-level gloss and is not a competing analysis. + * - At most one `PhraseAnalysis` containing a given `tokenRef` has `status: 'approved'`. That + * phrase is canonical for rendering. + * - A token may carry both a `TokenAnalysis` _and_ an approved `PhraseAnalysis`; the per-token + * parse coexists with the phrase-level gloss and is not a competing analysis. */ - phrases: Phrase[]; + phrases: PhraseAnalysis[]; + } + + /** + * Shared base for all analysis record types (`SegmentAnalysis`, `TokenAnalysis`, + * `PhraseAnalysis`). Carries the fields common to every analysis: stable identity, review status, + * and optional provenance. + */ + export interface Analysis { + /** Unique within the owning `TextAnalysis` — stable reference for this record. */ + id: string; + + /** Required review status. */ + status: AssignmentStatus; + + /** + * How much to trust this analysis. Independent of who produced it — see `producer` / + * `sourceUser`. + */ + confidence?: Confidence; + + /** + * Free-form tag identifying what produced this analysis — e.g. `"human"`, `"parser"`, + * `"eflomal"`, or a specific tool name. + */ + producer?: string; + + /** + * User identifier for human-created or human-edited analyses. Omitted for purely + * machine-generated entries. Both `producer` and `sourceUser` may be set simultaneously when a + * human uses a tool-assisted workflow; `producer` names the tool and `sourceUser` identifies + * the human reviewer. + */ + sourceUser?: string; } /** @@ -532,17 +577,8 @@ declare module 'interlinearizer' { * - BT Extension: free / literal translations are not natively stored — typically absent unless * synthesized. */ - export interface SegmentAnalysis { - /** - * Unique within the owning `TextAnalysis` — used as a stable reference for this analysis - * record. - */ - id: string; - - /** - * Reference to the corresponding `Segment.id` in the text layer (unique within the owning - * `Book`). - */ + export interface SegmentAnalysis extends Analysis { + /** Reference to the corresponding `Segment.id` in the text layer. */ segmentId: string; /** Idiomatic translation of the segment. */ @@ -550,27 +586,6 @@ declare module 'interlinearizer' { /** Word-for-word translation. May be generated from token glosses. */ literalTranslation?: MultiString; - - /** - * How much to trust this segment-level analysis. Independent of who produced it — see - * `producer` / `sourceUser` for that. - */ - confidence?: Confidence; - - /** Required review status. */ - status: AssignmentStatus; - - /** - * Free-form tag identifying what produced this analysis — e.g. `"human"`, `"bt-draft"`, or a - * specific tool name. - */ - producer?: string; - - /** - * User identifier for human-created or human-edited analyses. Omitted for purely - * machine-generated entries. - */ - sourceUser?: string; } // --------------------------------------------------------------------------- @@ -610,18 +625,16 @@ declare module 'interlinearizer' { * inferred from status. No morpheme decomposition — `morphemes` is either empty or a single * whole-word morpheme. `pos` available from Macula TSV for source-language tokens only. */ - export type TokenAnalysis = { + export interface TokenAnalysis extends Analysis { + /** Reference to the `Token.ref` being analyzed. */ + tokenRef: string; + /** - * Unique within the owning `TextAnalysis` — used as the cross-reference key by - * `AlignmentEndpoint.tokenAnalysisId` for morpheme-level alignment links. + * Ordered morpheme breakdown. Present when the analysis reaches sub-word granularity (e.g. an + * LCM `IWfiAnalysis` with `MorphBundlesOS`). Absent when the analysis treats the token as a + * single whole-word unit. */ - id: string; - - /** Reference to the `Token.id` being analyzed (unique within the owning `Book`). */ - tokenId: string; - - /** Ordered morpheme breakdown. Omitted for whole-word analyses. */ - morphemes?: Morpheme[]; + morphemes?: MorphemeAnalysis[]; /** Part of speech (free-form tag or lexicon POS id). */ pos?: string; @@ -632,35 +645,10 @@ declare module 'interlinearizer' { */ features?: Record; - /** - * How much to trust this analysis. Independent of who produced it — see `producer` / - * `sourceUser` for that. - */ - confidence?: Confidence; - - /** Required review status. */ - status: AssignmentStatus; - - /** - * Free-form tag identifying what produced this analysis — e.g. `"human"`, `"parser"`, - * `"eflomal"`, or a specific tool name. Distinguishes human edits from each of several possible - * engines. - */ - producer?: string; - - /** - * User identifier for human-created or human-edited analyses. Omitted for purely - * machine-generated entries. - */ - sourceUser?: string; - /** * Surface text of the token at analysis time — used for drift detection. Consumers compare this * against the current `Token.surfaceText`; on mismatch, flip `status` to `'stale'` to prompt * re-review. - * - * Holds the raw surface text for debuggability; can be swapped for a hash if storage cost - * becomes a concern (token text is typically short, so the literal string is usually fine). */ tokenSnapshot?: string; @@ -676,10 +664,13 @@ declare module 'interlinearizer' { * `glossSenseRef` is retained so the lexicon link is not lost. */ glossSenseRef?: SenseRef; - }; + } /** - * An ordered morpheme within a token's parse. + * Analysis of one morpheme within a token's parse. Unlike `TokenAnalysis` and `SegmentAnalysis`, + * which reference their subject by id, `MorphemeAnalysis` owns the morpheme itself: `form` and + * `writingSystem` store the structural data directly, while the optional refs link it into the + * Lexicon extension for lexical resolution. * * `form` is the morpheme's surface text as it appeared in this analysis context — which may * differ from the citation form on the referenced lexicon entry (e.g. under phonological @@ -708,10 +699,10 @@ declare module 'interlinearizer' { * from the lemma. `allomorphRef` / `grammarRef` are left unset — BT Extension does not carry * these. */ - export interface Morpheme { + export interface MorphemeAnalysis { /** * Unique within the owning `TokenAnalysis.morphemes` array — used as the cross-reference key by - * `AlignmentEndpoint.morphemeId`. + * `MorphemeLink.morphemeId`. */ id: string; @@ -748,13 +739,13 @@ declare module 'interlinearizer' { } // --------------------------------------------------------------------------- - // §4 Phrase — multi-token gloss unit + // §4 PhraseAnalysis — multi-token gloss unit // --------------------------------------------------------------------------- /** * A multi-token unit glossed or analyzed as a single phrase. * - * `tokenIds` lists the tokens (in order) that belong to the phrase. The tokens may be: + * `tokenRefs` lists the tokens (in order) that belong to the phrase. The tokens may be: * * - Adjacent within one segment ("en el" → "in the") * - Disjoint within one segment (French "ne … pas" → "not") @@ -775,51 +766,16 @@ declare module 'interlinearizer' { * Source-system mapping: * * - LCM: LCM does not natively model multi-word phrases as first-class objects. Multi-word glosses, - * when present, must be synthesized as `Phrase` records during import. + * when present, must be synthesized as `PhraseAnalysis` records during import. * - Paratext: a `LexemeCluster` with `Type = Phrase` spans multiple words — each such cluster - * becomes one `Phrase` whose `tokenIds` enumerate the covered tokens. `senseRef` is the - * selected `LexemeData` reference for the phrase. + * becomes one `PhraseAnalysis` whose `tokenRefs` enumerate the covered tokens. `senseRef` is + * the selected `LexemeData` reference for the phrase. * - BT Extension: not natively tracked. Must be synthesized during migration when adjacent tokens * share the same gloss / sense. */ - export type Phrase = { - /** Unique within the owning `TextAnalysis` — used as a stable reference for this phrase record. */ - id: string; - - /** Ordered `Token.id` values that compose this phrase. */ - tokenIds: [string, ...string[]]; - - /** Required review status. */ - status: AssignmentStatus; - - /** - * How much to trust this phrase. Independent of who produced it — see `producer` / `sourceUser` - * for that. - */ - confidence?: Confidence; - - /** - * Free-form tag identifying what produced this phrase — e.g. `"human"`, `"phrase-detector"`, or - * a specific tool name. - */ - producer?: string; - - /** - * User identifier for human-created or human-edited phrases. Omitted for purely - * machine-generated entries. - */ - sourceUser?: string; - - /** - * Surface text of each token at creation time, parallel to `tokenIds`. Enables drift detection - * for phrases — if any index's snapshot no longer matches the current `Token.surfaceText`, the - * phrase is flagged `Stale`. - * - * **Invariant:** when present, `tokenSnapshots` must have the same length as `tokenIds` and - * each index `i` corresponds to the `Token.surfaceText` for `tokenIds[i]`. Consumers must - * maintain this alignment when filtering or transforming tokens. - */ - tokenSnapshots?: [string, ...string[]]; + export interface PhraseAnalysis extends Analysis { + /** Ordered `Token.ref` values of the tokens that compose this phrase. */ + tokenRefs: [string, ...string[]]; /** * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over @@ -833,7 +789,7 @@ declare module 'interlinearizer' { * retained so the lexicon link is not lost. */ senseRef?: SenseRef; - }; + } // --------------------------------------------------------------------------- // §5 AlignmentLink, AlignmentEndpoint @@ -883,66 +839,75 @@ declare module 'interlinearizer' { */ confidence?: Confidence; - /** Multilingual notes keyed by writing system (e.g. UI locale). */ + /** Multilingual notes keyed by BCP 47 writing-system tag (e.g. `'en'`, `'fr'`). */ notes?: MultiString; } /** * One side of an alignment link. * - * When `morphemeId` is set the link connects at the morpheme level. Because a single token may - * have multiple competing `TokenAnalysis` entries, `tokenAnalysisId` is **required** alongside - * `morphemeId` to identify the specific `TokenAnalysis` that owns the referenced morpheme. When - * `morphemeId` is absent the link connects to the whole token. - * - * Exactly one of two shapes is valid — setting `morphemeId` without `tokenAnalysisId` (or vice - * versa) is a TypeScript type error: + * When `morphemeLink` is set the link connects at the morpheme level. Because a single token may + * have multiple competing `TokenAnalysis` entries, `morphemeLink.tokenAnalysisId` is **required** + * alongside `morphemeLink.morphemeId` to identify the specific `TokenAnalysis` that owns the + * referenced morpheme. When `morphemeLink` is absent the link connects to the whole token. * - * - Token-level: neither `morphemeId` nor `tokenAnalysisId` is present. - * - Morpheme-level: both `morphemeId` and `tokenAnalysisId` are present. + * Resolution chain (morpheme-level): AlignmentEndpoint → Token (via `token.tokenRef`) → + * TokenAnalysis (via `morphemeLink.tokenAnalysisId`) → MorphemeAnalysis (via + * `morphemeLink.morphemeId`) → EntryRef → `IEntry` (Lexicon extension) → SenseRef → `ISense` + * (Lexicon extension) * - * Resolution chain (morpheme-level): AlignmentEndpoint → Token (via `tokenId`) → TokenAnalysis - * (via `tokenAnalysisId`) → Morpheme (via `morphemeId`) → EntryRef → `IEntry` (Lexicon extension) - * → SenseRef → `ISense` (Lexicon extension) - * - * Resolution chain (token-level): AlignmentEndpoint → Token (surface text only) + * Resolution chain (token-level): AlignmentEndpoint → Token (via `token.tokenRef`) → + * `Token.surfaceText` (display) / `TokenAnalysis[]` (analysis, looked up by `tokenRef`) * * Source-system mapping: * * - LCM / Paratext: endpoints produced only through external tools or parallel-project inference * (see `AlignmentLink`). * - BT Extension: one endpoint per `Instance` in an `Alignment`'s `sourceInstances` / - * `targetInstances`. `morphemeId` and `tokenAnalysisId` are set when the token has a - * morpheme-level parse; otherwise the endpoint targets the whole token. + * `targetInstances`. `morphemeLink` is set when the token has a morpheme-level parse; otherwise + * the endpoint targets the whole token. */ - export type AlignmentEndpoint = { - /** The `Token.id` this endpoint targets. */ - tokenId: string; + export interface AlignmentEndpoint { + /** + * Token targeted by this endpoint. Identifies the token via `tokenRef` and carries a surface + * text snapshot for drift detection. + */ + token: TokenSnapshot; + + /** + * When set, narrows the endpoint to a specific morpheme within the token's parse. When absent, + * the endpoint targets the whole token. + */ + morphemeLink?: MorphemeLink; + } + + /** + * A snapshot of a token at the time an alignment endpoint was created. Carries the stable token + * reference and a copy of its surface text for drift detection. + */ + export interface TokenSnapshot { + /** `Token.ref` of the targeted token. */ + tokenRef: string; /** * Surface text of the token at link-creation time — used for drift detection. A link whose * endpoint snapshot no longer matches the current `Token.surfaceText` is stale; consumers flip * the link's `status` to `'stale'` to prompt re-review. */ - tokenSnapshot?: string; - } & - /** - * Either both `morphemeId` and `tokenAnalysisId` are set (morpheme-level link), or neither is - * set (token-level link). `tokenAnalysisId` is required alongside `morphemeId` because a single - * token may have multiple competing `TokenAnalysis` entries; without it the target morpheme - * would be ambiguous. - */ - (| { morphemeId?: never; tokenAnalysisId?: never } - | { - /** - * The `TokenAnalysis.id` that owns the referenced morpheme. Required when `morphemeId` is - * set. - */ - tokenAnalysisId: string; - /** Specific `Morpheme.id` within the identified `TokenAnalysis.morphemes`. */ - morphemeId: string; - } - ); + surfaceText: string; + } + + /** + * Identifies a specific morpheme within a token's parse for morpheme-level alignment endpoints. + * Both fields are required together: `tokenAnalysisId` selects the `TokenAnalysis` (since a token + * may have multiple competing analyses) and `morphemeId` selects the morpheme within it. + */ + export interface MorphemeLink { + /** The `TokenAnalysis.id` that owns the referenced morpheme. */ + tokenAnalysisId: string; + /** Specific `MorphemeAnalysis.id` within the identified `TokenAnalysis.morphemes`. */ + morphemeId: string; + } // --------------------------------------------------------------------------- // §6 InterlinearProject — persisted project envelope @@ -954,8 +919,8 @@ declare module 'interlinearizer' { * * The token hierarchy (`Book` / `Segment` / `Token`) is **not** stored here — it is rebuilt from * Platform.Bible's USJ on each load. Only the analysis data and alignment links are persisted. - * Token-level drift is detected via `tokenSnapshot` fields on `TokenAnalysis` and - * `AlignmentEndpoint` records. + * Token-level drift is detected via `tokenSnapshot` on `TokenAnalysis` records and via + * `token.surfaceText` on `AlignmentEndpoint` records. * * Projects are stored via `papi.storage` (extension-host only) under two keys: * @@ -995,7 +960,7 @@ declare module 'interlinearizer' { /** * BCP 47 tags for all languages used in glosses and annotations (e.g. `['en']`). Populates - * `MultiString` keys in `TokenAnalysis`, `SegmentAnalysis`, and `Phrase` records. + * `MultiString` keys in `TokenAnalysis`, `SegmentAnalysis`, and `PhraseAnalysis` records. * * Source-system mapping: * @@ -1011,10 +976,11 @@ declare module 'interlinearizer' { analysis: TextAnalysis; /** - * Token- or morpheme-level alignment links. Empty at creation; populated as the user aligns - * source and target tokens. + * Token- or morpheme-level alignment links. Absent (`undefined`) at creation for analysis-only + * projects; present (possibly empty) for bilateral alignment projects. Populated as the user + * aligns source and target tokens. */ - links: AlignmentLink[]; + links?: AlignmentLink[]; } // --------------------------------------------------------------------------- From 1c228c94fd01d88c2153ebcc43d6b3e7455d6eb1 Mon Sep 17 00:00:00 2001 From: "D. Ror." Date: Fri, 15 May 2026 10:57:48 -0600 Subject: [PATCH 7/8] Suggested model tweak --- src/types/interlinearizer.d.ts | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts index 949bbbd6..8af89c99 100644 --- a/src/types/interlinearizer.d.ts +++ b/src/types/interlinearizer.d.ts @@ -528,7 +528,7 @@ declare module 'interlinearizer' { * - A token may carry both a `TokenAnalysis` _and_ an approved `PhraseAnalysis`; the per-token * parse coexists with the phrase-level gloss and is not a competing analysis. */ - phrases: PhraseAnalysis[]; + phraseAnalyses: PhraseAnalysis[]; } /** @@ -626,8 +626,8 @@ declare module 'interlinearizer' { * whole-word morpheme. `pos` available from Macula TSV for source-language tokens only. */ export interface TokenAnalysis extends Analysis { - /** Reference to the `Token.ref` being analyzed. */ - tokenRef: string; + /** Snapshot of the token being analyzed. */ + token: TokenSnapshot; /** * Ordered morpheme breakdown. Present when the analysis reaches sub-word granularity (e.g. an @@ -645,13 +645,6 @@ declare module 'interlinearizer' { */ features?: Record; - /** - * Surface text of the token at analysis time — used for drift detection. Consumers compare this - * against the current `Token.surfaceText`; on mismatch, flip `status` to `'stale'` to prompt - * re-review. - */ - tokenSnapshot?: string; - /** * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over * `glossSenseRef` when both are present. @@ -745,7 +738,7 @@ declare module 'interlinearizer' { /** * A multi-token unit glossed or analyzed as a single phrase. * - * `tokenRefs` lists the tokens (in order) that belong to the phrase. The tokens may be: + * `tokens` lists the tokens (in order) that belong to the phrase. The tokens may be: * * - Adjacent within one segment ("en el" → "in the") * - Disjoint within one segment (French "ne … pas" → "not") @@ -774,8 +767,8 @@ declare module 'interlinearizer' { * share the same gloss / sense. */ export interface PhraseAnalysis extends Analysis { - /** Ordered `Token.ref` values of the tokens that compose this phrase. */ - tokenRefs: [string, ...string[]]; + /** Ordered snapshots of tokens that compose this phrase. */ + tokens: [TokenSnapshot, ...TokenSnapshot[]]; /** * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over @@ -905,6 +898,7 @@ declare module 'interlinearizer' { export interface MorphemeLink { /** The `TokenAnalysis.id` that owns the referenced morpheme. */ tokenAnalysisId: string; + /** Specific `MorphemeAnalysis.id` within the identified `TokenAnalysis.morphemes`. */ morphemeId: string; } From 7c08b8ade4494852ccf9e6e263db6e3d49590d05 Mon Sep 17 00:00:00 2001 From: "D. Ror." Date: Fri, 15 May 2026 10:58:11 -0600 Subject: [PATCH 8/8] Model idea: Split linking out from analyses --- src/types/interlinearizer.d.ts | 178 +++++++++++++++++++++------------ 1 file changed, 112 insertions(+), 66 deletions(-) diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts index 8af89c99..79208d78 100644 --- a/src/types/interlinearizer.d.ts +++ b/src/types/interlinearizer.d.ts @@ -4,6 +4,7 @@ */ declare module 'papi-shared-types' { + /** Project-level settings contributed by the Interlinearizer extension. */ export interface ProjectSettingTypes { /** * When true, the Interlinearizer displays a continuous horizontal token scroll strip above the @@ -103,6 +104,11 @@ declare module 'papi-shared-types' { * * Shape at a glance: * + * ActiveProject + * ├─ project : InterlinearProject + * ├─ source : Book[] + * └─ target? : Book[] — present only when targetProjectId is set + * * InterlinearProject * ├─ sourceProjectId * ├─ targetProjectId? — absent for analysis-only projects (LCM, PT9) @@ -110,16 +116,19 @@ declare module 'papi-shared-types' { * ├─ analysis : TextAnalysis * └─ links? : AlignmentLink[] * - * ActiveProject - * ├─ project : InterlinearProject - * ├─ source : Book[] - * └─ target? : Book[] — present only when targetProjectId is set + * TextAnalysis + * ├─ segmentAnalyses : SegmentAnalysis[] + * ├─ segmentAnalysisLinks : SegmentAnalysisLink[] + * ├─ tokenAnalyses : TokenAnalysis[] + * ├─ tokenAnalysisLinks : TokenAnalysisLink[] + * ├─ phraseAnalyses : PhraseAnalysis[] + * └─ phraseAnalysisLinks : PhraseAnalysisLink[] * - * The analysis layer is **flat** — not a mirror of the text layer's book / segment nesting. Every - * analysis record carries an id reference back to its text-layer counterpart (`segmentId` / - * `tokenRef`). Consumers index by id at load time (`Map`, etc.) to - * render a segment at a time. This keeps the layer's containers honest — none exist just to mirror - * a parent — and makes it trivial to add analyses without touching the text hierarchy. + * The analysis layer is **flat** — not a mirror of the text layer's book / segment nesting. + * Analysis payloads (`SegmentAnalysis`, `TokenAnalysis`, `PhraseAnalysis`) are stored separately + * from their text-layer attachments. Link records (`segmentAnalysisLinks`, `tokenAnalysisLinks`, + * `phraseAnalysisLinks`) connect each analysis id to one segment or one/many tokens. Consumers + * index links by segment/token ids at load time to render a segment at a time. * * Lexical information (entries, senses, allomorphs, grammar / MSA, …) is **not** stored in this * model. It lives in the Lexicon extension (`lexicon`); this model references it via `EntryRef` / @@ -136,10 +145,9 @@ declare module 'papi-shared-types' { * `Segment.tokens` so the baseline text can be reconstructed faithfully. They are simply omitted * from the analysis layer's `tokenAnalyses` (rather than stored there with empty analyses). * - * Staleness detection: `TokenAnalysis` records carry a `tokenSnapshot` of the token's surface text - * at analysis time. `AlignmentEndpoint` records carry an equivalent snapshot via - * `token.surfaceText`. When the baseline changes, consumers compare the snapshot against the - * current `Token.surfaceText` and flip `status` to `'stale'` on mismatch to prompt re-review. + * Staleness detection: `AlignmentEndpoint` records carry a token snapshot via `token.surfaceText`. + * When the baseline changes, consumers compare the snapshot against the current `Token.surfaceText` + * and flip `status` to `'stale'` on mismatch to prompt re-review. */ declare module 'interlinearizer' { // --------------------------------------------------------------------------- @@ -167,8 +175,8 @@ declare module 'interlinearizer' { * - `candidate` — proposed but not yet reviewed * - `rejected` — explicitly rejected by a human * - `stale` — the underlying token text has changed since this record was created; the record needs - * human review. Set by drift-detection logic comparing `tokenSnapshot` against the current - * `Token.surfaceText`. + * human review. Set by drift-detection logic comparing stored `TokenSnapshot.surfaceText` + * values against the current `Token.surfaceText`. */ export type AssignmentStatus = 'approved' | 'suggested' | 'candidate' | 'rejected' | 'stale'; @@ -357,7 +365,7 @@ declare module 'interlinearizer' { /** * Stable identifier for this segment, unique within the owning `InterlinearProject`. In * practice the id is project-wide unique because it is set to the verse SID (e.g. `"GEN 1:1"`). - * Used as the cross-reference key by `SegmentAnalysis.segmentId`. + * Used as the segment-side key by `SegmentAnalysisLink.segmentId`. */ id: string; @@ -422,8 +430,8 @@ declare module 'interlinearizer' { /** * Stable identifier for this token, unique within the owning `InterlinearProject`. In practice * the ref is project-wide unique because it embeds the verse SID and the token's character - * offset (e.g. `"GEN 1:1:0"` for the first token in Genesis 1:1). Used as the cross-reference - * key by `TokenAnalysis.tokenRef`, `PhraseAnalysis.tokenRefs`, and + * offset (e.g. `"GEN 1:1:0"` for the first token in Genesis 1:1). Used as the token-side key by + * `TokenAnalysisLink.token.tokenRef`, `PhraseAnalysisLink.tokens[*].tokenRef`, and * `AlignmentEndpoint.token.tokenRef`. */ ref: string; @@ -463,10 +471,9 @@ declare module 'interlinearizer' { /** * The analysis layer for an `InterlinearProject`. * - * Flat by design — it does **not** mirror the text layer's book / segment nesting. Every record - * carries an id reference back to its text-layer counterpart (`segmentId` / `tokenRef`). - * Consumers that need segment-local views build `Map` / `Map` at load time. + * Flat by design — it does **not** mirror the text layer's book / segment nesting. Analysis + * payload records are linked to the text layer through the corresponding `*AnalysisLinks` arrays. + * Consumers that need segment-local views build indexes from those links at load time. * * Keeping this layer flat avoids ceremonial container types whose only purpose is to mirror a * parent, and makes it trivial to add or remove analyses without touching the text hierarchy. @@ -481,67 +488,103 @@ declare module 'interlinearizer' { */ export interface TextAnalysis { /** - * Per-segment analysis records, keyed to `Segment.id` via `segmentId`. Carries only - * segment-level data (free / literal translations); token-level data lives in `tokenAnalyses`. + * Per-segment analysis payload records. Carries only segment-level data (free / literal + * translations); token-level data lives in `tokenAnalyses`. * - * Competing analyses are permitted: a single `segmentId` may have multiple `SegmentAnalysis` + * Competing analyses are permitted: a single segment may have multiple linked `SegmentAnalysis` * entries (e.g. an AI-drafted back translation alongside a human-edited one), distinguished by * `status` / `confidence` / `producer`. * - * **Invariant:** at most one `SegmentAnalysis` per `segmentId` has `status: 'approved'`. That - * entry is the canonical segment-level analysis for rendering; alternates are available to - * review workflows via the other statuses. This invariant is the caller's responsibility to - * maintain; no runtime enforcement exists. + * **Invariant:** for a given segment, at most one linked `SegmentAnalysisLink` should have + * `status: 'approved'`. That linked analysis is the canonical segment-level analysis for + * rendering; alternates are available to review workflows via the other statuses. This + * invariant is the caller's responsibility to maintain; no runtime enforcement exists. */ segmentAnalyses: SegmentAnalysis[]; /** - * Token-level analyses, flat across the whole text. Each entry references its token by - * `tokenRef`; the text layer keeps every token (words and punctuation) but this list typically - * includes only the tokens being analyzed — punctuation is omitted rather than stored with - * empty analyses. + * Links each `SegmentAnalysis.id` to a single `Segment.id`, along with review metadata for that + * assignment. + */ + segmentAnalysisLinks: SegmentAnalysisLink[]; + + /** + * Token-level analysis payload records, flat across the whole text. The text layer keeps every + * token (words and punctuation) but this list typically includes only tokens being analyzed — + * punctuation is omitted rather than stored with empty analyses. * - * Competing analyses are permitted: a single `tokenRef` may have multiple `TokenAnalysis` + * Competing analyses are permitted: a single token may have multiple linked `TokenAnalysis` * entries (e.g. a parser's suggestion alongside a human's choice), distinguished by `status` / * `confidence` / `producer`. * - * **Invariant:** at most one `TokenAnalysis` per `tokenRef` has `status: 'approved'`. That - * entry is the canonical analysis for rendering; alternates are available to review workflows - * via the other statuses (`'suggested'`, `'candidate'`, `'rejected'`, `'stale'`). This - * invariant is the caller's responsibility to maintain; no runtime enforcement exists. + * **Invariant:** for a given token, at most one linked `TokenAnalysisLink` should have `status: + * 'approved'`. That linked analysis is the canonical analysis for rendering; alternates are + * available to review workflows via the other statuses (`'suggested'`, `'candidate'`, + * `'rejected'`, `'stale'`). This invariant is the caller's responsibility to maintain; no + * runtime enforcement exists. */ tokenAnalyses: TokenAnalysis[]; + /** + * Links each `TokenAnalysis.id` to one token snapshot, along with review metadata for that + * assignment. + */ + tokenAnalysisLinks: TokenAnalysisLink[]; + /** * Multi-token phrase analyses, flat across the whole text. A phrase may group adjacent or * disjoint tokens and carries its own gloss. A phrase's member tokens may span multiple * segments. * - * Competing phrases are permitted: a given `tokenRef` may appear in multiple `PhraseAnalysis` + * Competing phrases are permitted: a given token may appear in multiple linked `PhraseAnalysis` * records (e.g. a suggested phrase grouping plus a human-approved one) distinguished by * `status`. * * **Invariants:** * - * - At most one `PhraseAnalysis` containing a given `tokenRef` has `status: 'approved'`. That - * phrase is canonical for rendering. + * - At most one linked `PhraseAnalysisLink` containing a given token should have `status: + * 'approved'`. That phrase is canonical for rendering. * - A token may carry both a `TokenAnalysis` _and_ an approved `PhraseAnalysis`; the per-token * parse coexists with the phrase-level gloss and is not a competing analysis. */ phraseAnalyses: PhraseAnalysis[]; + + /** + * Links each `PhraseAnalysis.id` to one or more token snapshots, along with review metadata for + * that assignment. + */ + phraseAnalysisLinks: PhraseAnalysisLink[]; + } + + /** Shared link metadata for attaching an analysis payload record to text-layer targets. */ + export interface AnalysisLink { + /** The `Analysis.id` for the linked analysis payload record. */ + analysisId: string; + + /** Required review status. */ + status: AssignmentStatus; + + /** How much to trust this analysis assignment. */ + confidence?: Confidence; + } + + /** Links one `SegmentAnalysis` payload record to a single source segment. */ + export interface SegmentAnalysisLink extends AnalysisLink { + /** Reference to the corresponding `Segment.id` in the text layer. */ + segmentId: string; } /** - * Shared base for all analysis record types (`SegmentAnalysis`, `TokenAnalysis`, - * `PhraseAnalysis`). Carries the fields common to every analysis: stable identity, review status, - * and optional provenance. + * Shared base for all analysis payload record types (`SegmentAnalysis`, `TokenAnalysis`, + * `PhraseAnalysis`). Carries fields common to each analysis payload: stable identity, token + * surface text, and optional provenance. */ export interface Analysis { /** Unique within the owning `TextAnalysis` — stable reference for this record. */ id: string; - /** Required review status. */ - status: AssignmentStatus; + /** Surface form of the analyzed text span (token, phrase, or segment). */ + surfaceText: string; /** * How much to trust this analysis. Independent of who produced it — see `producer` / @@ -565,8 +608,8 @@ declare module 'interlinearizer' { } /** - * Per-segment analysis record. Carries data that belongs to a segment as a whole (free / literal - * translations). Token analyses and phrases live on `TextAnalysis` directly, keyed by id. + * Per-segment analysis payload record. Carries data that belongs to a segment as a whole (free / + * literal translations). Token analyses and phrases live on `TextAnalysis` directly. * * Source-system mapping: * @@ -578,9 +621,6 @@ declare module 'interlinearizer' { * synthesized. */ export interface SegmentAnalysis extends Analysis { - /** Reference to the corresponding `Segment.id` in the text layer. */ - segmentId: string; - /** Idiomatic translation of the segment. */ freeTranslation?: MultiString; @@ -592,6 +632,12 @@ declare module 'interlinearizer' { // §3 TokenAnalysis — parse + 1:1 gloss // --------------------------------------------------------------------------- + /** Links one `TokenAnalysis` payload record to exactly one token snapshot. */ + export interface TokenAnalysisLink extends AnalysisLink { + /** Token that this analysis refers to. */ + token: TokenSnapshot; + } + /** * Analysis of a single token: a word-level (1:1) gloss plus optional morpheme-level parse. * @@ -626,9 +672,6 @@ declare module 'interlinearizer' { * whole-word morpheme. `pos` available from Macula TSV for source-language tokens only. */ export interface TokenAnalysis extends Analysis { - /** Snapshot of the token being analyzed. */ - token: TokenSnapshot; - /** * Ordered morpheme breakdown. Present when the analysis reaches sub-word granularity (e.g. an * LCM `IWfiAnalysis` with `MorphBundlesOS`). Absent when the analysis treats the token as a @@ -660,10 +703,9 @@ declare module 'interlinearizer' { } /** - * Analysis of one morpheme within a token's parse. Unlike `TokenAnalysis` and `SegmentAnalysis`, - * which reference their subject by id, `MorphemeAnalysis` owns the morpheme itself: `form` and - * `writingSystem` store the structural data directly, while the optional refs link it into the - * Lexicon extension for lexical resolution. + * Analysis of one morpheme within a token's parse. `MorphemeAnalysis` owns the morpheme itself: + * `form` and `writingSystem` store the structural data directly, while the optional refs link it + * into the Lexicon extension for lexical resolution. * * `form` is the morpheme's surface text as it appeared in this analysis context — which may * differ from the citation form on the referenced lexicon entry (e.g. under phonological @@ -735,10 +777,17 @@ declare module 'interlinearizer' { // §4 PhraseAnalysis — multi-token gloss unit // --------------------------------------------------------------------------- + /** Links one `PhraseAnalysis` payload record to one or more token snapshots. */ + export interface PhraseAnalysisLink extends AnalysisLink { + /** Ordered snapshots of tokens that compose this phrase. */ + tokens: [TokenSnapshot, ...TokenSnapshot[]]; + } + /** * A multi-token unit glossed or analyzed as a single phrase. * - * `tokens` lists the tokens (in order) that belong to the phrase. The tokens may be: + * The linked `PhraseAnalysisLink.tokens` list holds the token snapshots (in order) that belong to + * the phrase. The tokens may be: * * - Adjacent within one segment ("en el" → "in the") * - Disjoint within one segment (French "ne … pas" → "not") @@ -761,15 +810,12 @@ declare module 'interlinearizer' { * - LCM: LCM does not natively model multi-word phrases as first-class objects. Multi-word glosses, * when present, must be synthesized as `PhraseAnalysis` records during import. * - Paratext: a `LexemeCluster` with `Type = Phrase` spans multiple words — each such cluster - * becomes one `PhraseAnalysis` whose `tokenRefs` enumerate the covered tokens. `senseRef` is - * the selected `LexemeData` reference for the phrase. + * becomes one `PhraseAnalysis` whose linked `PhraseAnalysisLink.tokens` enumerate the covered + * tokens. `senseRef` is the selected `LexemeData` reference for the phrase. * - BT Extension: not natively tracked. Must be synthesized during migration when adjacent tokens * share the same gloss / sense. */ export interface PhraseAnalysis extends Analysis { - /** Ordered snapshots of tokens that compose this phrase. */ - tokens: [TokenSnapshot, ...TokenSnapshot[]]; - /** * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over * `senseRef` when both are present. @@ -913,8 +959,8 @@ declare module 'interlinearizer' { * * The token hierarchy (`Book` / `Segment` / `Token`) is **not** stored here — it is rebuilt from * Platform.Bible's USJ on each load. Only the analysis data and alignment links are persisted. - * Token-level drift is detected via `tokenSnapshot` on `TokenAnalysis` records and via - * `token.surfaceText` on `AlignmentEndpoint` records. + * Token-level drift is detected via `token.surfaceText` snapshots on `AlignmentEndpoint` + * records. * * Projects are stored via `papi.storage` (extension-host only) under two keys: *