From a5556ce28a029d4543a9594517008356475e5548 Mon Sep 17 00:00:00 2001
From: alex-rawlings-yyc <alex.rawlings@wycliffe.ca>
Date: Fri, 15 May 2026 10:56:49 -0600
Subject: [PATCH 1/8] Simplify interlinear model: remove
 InterlinearAlignment/InterlinearText, add ActiveProject

---
 src/types/interlinearizer.d.ts | 216 ++++++++++-----------------------
 1 file changed, 62 insertions(+), 154 deletions(-)
diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts
index 79954ab3..b9cda9bb 100644
--- a/src/types/interlinearizer.d.ts
+++ b/src/types/interlinearizer.d.ts
@@ -22,19 +22,19 @@ declare module 'papi-shared-types' {
 
     /**
      * Creates a new interlinearizer project for the given source project. Called from the WebView
-     * after the user fills in the create-project modal. Returns the persisted project serialized as
-     * a JSON string, or undefined if project creation fails (failure is logged and surfaced as an
-     * error notification).
+     * after the user fills in the create-project modal. Returns the new project's UUID, or
+     * undefined if project creation fails (failure is logged and surfaced as an error
+     * notification).
      *
      * @param sourceProjectId Platform.Bible project ID of the source text to interlinearize.
-     * @param analysisWritingSystem BCP 47 tag for the language used in glosses and annotations
-     *   (e.g. `'en'`).
+     * @param analysisLanguages BCP 47 tags for the languages used in glosses and annotations (e.g.
+     *   `['en']`).
      * @param name Optional user-facing name for the project.
      * @param description Optional user-facing description for the project.
      */
     'interlinearizer.createProject': (
       sourceProjectId: string,
-      analysisWritingSystem: string,
+      analysisLanguages: string[],
       name?: string,
       description?: string,
     ) => Promise<string | undefined>;
@@ -56,23 +56,16 @@ declare module 'papi-shared-types' {
     'interlinearizer.deleteProject': (interlinearProjectId: string) => Promise<void>;
 
     /**
-     * Opens the project-selector dialog in the Interlinearizer WebView. The backend registers this
+     * Opens the project-selector modal in the Interlinearizer WebView. The backend registers this
      * command to make it visible to the platform menu system; all logic executes in the WebView.
      */
     'interlinearizer.openSelectProjectModal': () => Promise<void>;
 
     /**
-     * Opens the create-project dialog in the Interlinearizer WebView. The backend registers this
+     * Opens the create-project modal in the Interlinearizer WebView. The backend registers this
      * command to make it visible to the platform menu system; all logic executes in the WebView.
      */
-    'interlinearizer.openNewProjectModal': () => Promise<void>;
-
-    /**
-     * Opens the project-info modal for the active project in the Interlinearizer WebView. The
-     * backend registers this command to make it visible to the platform menu system; all logic
-     * executes in the WebView.
-     */
-    'interlinearizer.openProjectInfoModal': () => Promise<void>;
+    'interlinearizer.newProject': () => Promise<void>;
 
     /**
      * Updates the metadata of an existing interlinearizer project. Returns the updated project as a
@@ -81,14 +74,14 @@ declare module 'papi-shared-types' {
      * @param interlinearProjectId UUID of the interlinearizer project to update.
      * @param name New user-facing name; omit or pass `undefined` to clear.
      * @param description New user-facing description; omit or pass `undefined` to clear.
-     * @param analysisWritingSystem New BCP 47 analysis language tag; omit or pass empty to leave
+     * @param analysisLanguages New BCP 47 analysis language tags; omit or pass empty array to leave
      *   unchanged (the field is required and cannot be cleared).
      */
     'interlinearizer.updateProjectMetadata': (
       interlinearProjectId: string,
       name: string | undefined,
       description: string | undefined,
-      analysisWritingSystem?: string,
+      analysisLanguages?: string[],
     ) => Promise<string | undefined>;
   }
 }
@@ -101,15 +94,12 @@ declare module 'papi-shared-types' {
  *
  * Shape at a glance:
  *
- *     InterlinearAlignment
- *     ├─ source : InterlinearText    — the input being analyzed
- *     ├─ target : InterlinearText    — the analysis / output side
- *     └─ links  : AlignmentLink[]
+ *     ActiveProject
+ *     ├─ project : InterlinearProject   — persisted envelope (analysis + links)
+ *     └─ source  : Book[]               — text layer (rebuilt from USJ at runtime)
+ *          └─ Segment[] → Token[]
  *
- *     InterlinearText
- *     ├─ books    : Book[]           — text layer (baseline)
- *     │    └─ Segment[] → Token[]
- *     └─ analysis : TextAnalysis     — analysis layer (flat)
+ *     InterlinearProject.analysis : TextAnalysis   — analysis layer (flat)
  *          ├─ segmentAnalyses : SegmentAnalysis[]    (per-segment translations)
  *          ├─ tokenAnalyses   : TokenAnalysis[]      (parse + 1:1 gloss)
  *          └─ phrases         : Phrase[]             (multi-token gloss)
@@ -131,10 +121,9 @@ declare module 'papi-shared-types' {
  * - `IMoForm` (allomorph) is not exported; no allomorph service.
  * - `IMoMorphSynAnalysis` (MSA) is not exported; no MSA service.
  *
- * Punctuation tokens are first-class citizens of the text layer on both source and target sides —
- * they are stored in `Segment.tokens` so the baseline text can be reconstructed faithfully. They
- * are simply omitted from the analysis layer's `tokenAnalyses` (rather than stored there with empty
- * analyses).
+ * Punctuation tokens are first-class citizens of the text layer — they are stored in
+ * `Segment.tokens` so the baseline text can be reconstructed faithfully. They are simply omitted
+ * from the analysis layer's `tokenAnalyses` (rather than stored there with empty analyses).
  *
  * Staleness detection: analysis records and alignment endpoints carry a `tokenSnapshot` of the
  * token's surface text at analysis time. When the baseline changes, consumers compare the snapshot
@@ -303,112 +292,7 @@ declare module 'interlinearizer' {
   }
 
   // ---------------------------------------------------------------------------
-  // §1 InterlinearAlignment
-  // ---------------------------------------------------------------------------
-
-  /**
-   * Top-level bilingual container pairing two interlinear texts — `source` (the input being
-   * analyzed) and `target` (the analysis / output side) — with the alignment links between them.
-   *
-   * The sides carry directional meaning: `source` is what the workflow takes in, `target` is what
-   * the workflow produces or aligns toward. The model types themselves are identical on both sides;
-   * that directional contract is enforced by the application layer, not by shape. Example
-   * pairings:
-   *
-   * - Glossing a vernacular draft (source) in an analyst language such as English (target);
-   * - Aligning a Greek / Hebrew resource text (source) against a vernacular translation (target);
-   * - Aligning one translation (source) against another (target).
-   *
-   * Each side carries its own text (books → segments → tokens) and, optionally, a flat analysis
-   * layer (`segmentAnalyses` / `tokenAnalyses` / `phrases`). `AlignmentLink`s bridge tokens or
-   * morphemes from source to target.
-   *
-   * Source-system mapping:
-   *
-   * - LCM: no native bilingual alignment model. Constructed by pairing two `InterlinearText`
-   *   instances produced from LCM / companion data; the workflow decides which is `source` and
-   *   which is `target`.
-   * - Paratext: not directly represented. Can be constructed from parallel projects that share the
-   *   same versification.
-   * - BT Extension: one `Translation` scoped to two sides (`Translation.sideNum`: 1 / 2). By BT
-   *   convention side 1 is the source and side 2 is the target; each side becomes an
-   *   `InterlinearText`. `Alignment` records become `AlignmentLink`s.
-   */
-  export interface InterlinearAlignment {
-    /** Unique identifier for this alignment pair. */
-    id: string;
-
-    /**
-     * The input being analyzed — for example a vernacular draft being glossed, a Greek / Hebrew
-     * resource text being aligned against a translation, or one translation being aligned against
-     * another.
-     */
-    source: InterlinearText;
-
-    /**
-     * The analysis / output side — for example an analyst-language gloss, a back translation, or
-     * the translation being aligned against the source.
-     */
-    target: InterlinearText;
-
-    /**
-     * Token- or morpheme-level alignment links connecting endpoints in the source interlinear to
-     * endpoints in the target interlinear. Empty when no alignment has been computed or imported.
-     */
-    links: AlignmentLink[];
-  }
-
-  // ---------------------------------------------------------------------------
-  // §2 InterlinearText
-  // ---------------------------------------------------------------------------
-
-  /**
-   * One side of an interlinear alignment — the baseline text plus its parallel analysis layer.
-   *
-   * The text layer (`books`) mirrors the underlying document's structure. The analysis layer
-   * (`analysis`) has the same divisions but carries morpheme / gloss / phrase information and
-   * references into the Lexicon extension.
-   *
-   * Source-system mapping:
-   *
-   * - LCM: one `IScripture` instance (singleton per project). Text layer from `IScrBook` /
-   *   `IScrSection` / `IScrTxtPara` content; analysis layer from `IWfiWordform` / `IWfiAnalysis` /
-   *   `IWfiGloss` referenced by `ISegment.AnalysesRS`. `analysisLanguages[]` is the set of
-   *   languages present on `IWfiGloss.Form`.
-   * - Paratext: merged from per-book, per-language `InterlinearData` files
-   *   (`Interlinear_{language}/Interlinear_{language}_{book}.xml`). Text layer from USFM; analysis
-   *   layer from `ClusterData` + `LexemeCluster` + `WordAnalysis`. Each file's `GlossLanguage` is
-   *   added to `analysisLanguages[]`.
-   * - BT Extension: one side of a `Translation` (a single `sideNum` value). Text layer from `Token` /
-   *   `Instance` records; analysis layer synthesized from per-token `gloss` / `lemmaText` /
-   *   `senseIds`. Analysis is typically in a single language.
-   */
-  export interface InterlinearText {
-    /** Unique identifier for this interlinear text. */
-    id: string;
-
-    /** Writing system of the baseline text. */
-    writingSystem: string;
-
-    /**
-     * Writing systems in which analyses are provided (e.g. `["en", "fr"]`). A single text can hold
-     * analyses in multiple languages.
-     */
-    analysisLanguages: string[];
-
-    /** Baseline text: books of scripture (or other texts). */
-    books: Book[];
-
-    /**
-     * Parallel analysis layer. Absent (not present) when the text has never been analyzed;
-     * present-but-empty (`segmentAnalyses: [], tokenAnalyses: [], phrases: []`) when analysis has
-     * been initialized but no records have been added yet.
-     */
-    analysis?: TextAnalysis;
-  }
-
-  // ---------------------------------------------------------------------------
-  // §3 Text layer — Book, Segment, Token
+  // §1 Text layer — Book, Segment, Token
   // ---------------------------------------------------------------------------
 
   /**
@@ -461,7 +345,7 @@ declare module 'interlinearizer' {
    */
   export interface Segment {
     /**
-     * Unique within the owning `InterlinearText` — used as the cross-reference key by
+     * Unique within the owning `Book` — used as the cross-reference key by
      * `SegmentAnalysis.segmentId`.
      */
     id: string;
@@ -520,8 +404,8 @@ declare module 'interlinearizer' {
    */
   export interface Token {
     /**
-     * Unique within the owning `InterlinearText` — used as the cross-reference key by
-     * `TokenAnalysis.tokenId`, `Phrase.tokenIds`, and `AlignmentEndpoint.tokenId`.
+     * Unique within the owning `Book` — used as the cross-reference key by `TokenAnalysis.tokenId`,
+     * `Phrase.tokenIds`, and `AlignmentEndpoint.tokenId`.
      */
     id: string;
 
@@ -554,11 +438,11 @@ declare module 'interlinearizer' {
   }
 
   // ---------------------------------------------------------------------------
-  // §4 Analysis layer — TextAnalysis, SegmentAnalysis
+  // §2 Analysis layer — TextAnalysis, SegmentAnalysis
   // ---------------------------------------------------------------------------
 
   /**
-   * The analysis layer for an `InterlinearText`.
+   * The analysis layer for an `InterlinearProject`.
    *
    * Flat by design — it does **not** mirror the text layer's book / segment nesting. Every record
    * carries an id reference back to its text-layer counterpart (`segmentId` / `tokenId`). Consumers
@@ -647,7 +531,7 @@ declare module 'interlinearizer' {
 
     /**
      * Reference to the corresponding `Segment.id` in the text layer (unique within the owning
-     * `InterlinearText`).
+     * `Book`).
      */
     segmentId: string;
 
@@ -680,7 +564,7 @@ declare module 'interlinearizer' {
   }
 
   // ---------------------------------------------------------------------------
-  // §5 TokenAnalysis — parse + 1:1 gloss
+  // §3 TokenAnalysis — parse + 1:1 gloss
   // ---------------------------------------------------------------------------
 
   /**
@@ -723,7 +607,7 @@ declare module 'interlinearizer' {
      */
     id: string;
 
-    /** Reference to the `Token.id` being analyzed (unique within the owning `InterlinearText`). */
+    /** Reference to the `Token.id` being analyzed (unique within the owning `Book`). */
     tokenId: string;
 
     /** Ordered morpheme breakdown. Omitted for whole-word analyses. */
@@ -855,7 +739,7 @@ declare module 'interlinearizer' {
   }
 
   // ---------------------------------------------------------------------------
-  // §6 Phrase — multi-token gloss unit
+  // §4 Phrase — multi-token gloss unit
   // ---------------------------------------------------------------------------
 
   /**
@@ -946,7 +830,7 @@ declare module 'interlinearizer' {
   };
 
   // ---------------------------------------------------------------------------
-  // §7 AlignmentLink, AlignmentEndpoint
+  // §5 AlignmentLink, AlignmentEndpoint
   // ---------------------------------------------------------------------------
 
   /**
@@ -967,7 +851,7 @@ declare module 'interlinearizer' {
    *   Eflomal-generated alignments leave `originNum` and `statusNum` unset (default 0, CREATED).
    */
   export interface AlignmentLink {
-    /** Unique within the owning `InterlinearAlignment` — stable reference for this link. */
+    /** Unique within the owning `InterlinearProject` — stable reference for this link. */
     id: string;
 
     /** Source-side endpoints (one or more tokens / morphemes). */
@@ -1055,7 +939,7 @@ declare module 'interlinearizer' {
     );
 
   // ---------------------------------------------------------------------------
-  // §8 InterlinearProject — persisted project envelope
+  // §6 InterlinearProject — persisted project envelope
   // ---------------------------------------------------------------------------
 
   /**
@@ -1089,16 +973,16 @@ declare module 'interlinearizer' {
     sourceProjectId: string;
 
     /**
-     * BCP 47 tag for the language used in glosses and annotations (e.g. `'en'`). Populates
+     * BCP 47 tags for the languages used in glosses and annotations (e.g. `['en']`). Populates
      * `MultiString` keys in `TokenAnalysis`, `SegmentAnalysis`, and `Phrase` records.
      */
-    analysisWritingSystem: string;
+    analysisLanguages: string[];
 
-    /** Source-side analysis layer. Empty at creation; populated as the user annotates tokens. */
-    sourceAnalysis: TextAnalysis;
+    /** Platform.Bible project ID for the target text, if a target-side project is linked. */
+    targetProjectId?: string;
 
-    /** Target-side analysis layer. Empty at creation; populated as the user annotates tokens. */
-    targetAnalysis: TextAnalysis;
+    /** Analysis layer. Empty at creation; populated as the user annotates tokens. */
+    analysis: TextAnalysis;
 
     /**
      * Token- or morpheme-level alignment links. Empty at creation; populated as the user aligns
@@ -1106,4 +990,28 @@ declare module 'interlinearizer' {
      */
     links: AlignmentLink[];
   }
+
+  // ---------------------------------------------------------------------------
+  // §7 ActiveProject — runtime pairing of project envelope and source text
+  // ---------------------------------------------------------------------------
+
+  /**
+   * The runtime object for an open interlinearizer project. Pairs the persisted
+   * {@link InterlinearProject} envelope with the reconstructed source-text hierarchy.
+   *
+   * `source` is rebuilt from Platform.Bible's USJ on each load and is never serialized. All
+   * annotation and alignment mutations target `project.analysis` and `project.links`; saving is
+   * done by writing those fields back to storage via `saveProjectAnalysis`.
+   */
+  export interface ActiveProject {
+    /** The persisted project envelope. Mutations target `project.analysis` and `project.links`. */
+    project: InterlinearProject;
+
+    /**
+     * The reconstructed source books, built from Platform.Bible USJ on load. Never serialized —
+     * rebuilt on every activation. Typically one book per scripture book code; multiple books may
+     * be present when the UI has prefetched adjacent books.
+     */
+    source: Book[];
+  }
 }

From fe04619e4f17c63ff9063f726d3ab812cc696a05 Mon Sep 17 00:00:00 2001
From: alex-rawlings-yyc <alex.rawlings@wycliffe.ca>
Date: Fri, 15 May 2026 10:56:57 -0600
Subject: [PATCH 2/8] Fix model gaps for lossless LCM / PT9 / BT Extension
 import

---
 src/types/interlinearizer.d.ts | 109 +++++++++++++++++++++------------
 1 file changed, 70 insertions(+), 39 deletions(-)

diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts
index b9cda9bb..b9b9a9ae 100644
--- a/src/types/interlinearizer.d.ts
+++ b/src/types/interlinearizer.d.ts
@@ -27,14 +27,19 @@ declare module 'papi-shared-types' {
      * notification).
      *
      * @param sourceProjectId Platform.Bible project ID of the source text to interlinearize.
-     * @param analysisLanguages BCP 47 tags for the languages used in glosses and annotations (e.g.
-     *   `['en']`).
+     * @param analysisLanguages BCP 47 tags for all languages used in glosses and annotations (e.g.
+     *   `['en']`). LCM: one per writing system present on `IWfiGloss.Form`. Paratext: one per
+     *   merged `GlossLanguage` file. BT Extension: typically one language.
+     * @param targetProjectId Optional Platform.Bible project ID of the target text. Required for BT
+     *   Extension projects so that `AlignmentLink.targetEndpoints` can be resolved at runtime.
+     *   Omitted for analysis-only projects (LCM, PT9 single-sided).
      * @param name Optional user-facing name for the project.
      * @param description Optional user-facing description for the project.
      */
     'interlinearizer.createProject': (
       sourceProjectId: string,
       analysisLanguages: string[],
+      targetProjectId?: string,
       name?: string,
       description?: string,
     ) => Promise<string | undefined>;
@@ -74,14 +79,17 @@ declare module 'papi-shared-types' {
      * @param interlinearProjectId UUID of the interlinearizer project to update.
      * @param name New user-facing name; omit or pass `undefined` to clear.
      * @param description New user-facing description; omit or pass `undefined` to clear.
-     * @param analysisLanguages New BCP 47 analysis language tags; omit or pass empty array to leave
-     *   unchanged (the field is required and cannot be cleared).
+     * @param analysisLanguages New BCP 47 analysis language tags; omit or pass an empty array to
+     *   leave unchanged (the field is required and cannot be cleared).
+     * @param targetProjectId New target-project ID; omit or pass `undefined` to clear (removes the
+     *   target-side text binding).
      */
     'interlinearizer.updateProjectMetadata': (
       interlinearProjectId: string,
       name: string | undefined,
       description: string | undefined,
       analysisLanguages?: string[],
+      targetProjectId?: string,
     ) => Promise<string | undefined>;
   }
 }
@@ -96,7 +104,8 @@ declare module 'papi-shared-types' {
  *
  *     ActiveProject
  *     ├─ project : InterlinearProject   — persisted envelope (analysis + links)
- *     └─ source  : Book[]               — text layer (rebuilt from USJ at runtime)
+ *     ├─ source  : Book[]               — source text layer (rebuilt from USJ at runtime)
+ *     └─ target? : Book[]               — target text layer (rebuilt from USJ at runtime; absent for analysis-only projects)
  *          └─ Segment[] → Token[]
  *
  *     InterlinearProject.analysis : TextAnalysis   — analysis layer (flat)
@@ -571,10 +580,10 @@ declare module 'interlinearizer' {
    * Analysis of a single token: a word-level (1:1) gloss plus optional morpheme-level parse.
    *
    * `gloss` is a free-form gloss string for the token (keyed by analysis-language tag).
-   * `glossSenseRef` alternatively resolves the gloss through a specific `ISense` in the Lexicon
-   * extension — when set, the rendered gloss is the sense's gloss text and may be refreshed
-   * automatically if the lexicon is edited. Both may be set concurrently: when both are present,
-   * `gloss` serves as a local override that takes precedence for rendering.
+   * `glossSenseRef` resolves the gloss through a specific `ISense` in the Lexicon extension — when
+   * set, the sense's gloss text can be surfaced and refreshed automatically if the lexicon is
+   * edited. Both may be present simultaneously; when they are, `gloss` takes precedence for
+   * rendering (the local override wins over the lexicon-derived value).
    *
    * `morphemes` carries the parse information. Each morpheme links to the Lexicon extension via
    * `entryRef` / `senseRef`.
@@ -592,8 +601,7 @@ declare module 'interlinearizer' {
    * - Paratext: `LexemeCluster` + `WordAnalysis`. `gloss` resolved from the selected
    *   `LexiconSense.Gloss` (per-language strings). `morphemes` from the `Lexeme[]` within
    *   `WordAnalysis` when `LexemeCluster.Type = WordParse`. Paratext stores POS on the lexeme, not
-   *   per-analysis. `status` / `confidence` inferred from `InterlinearLexeme.IsGuess` and
-   *   `.Score`.
+   *   per-analysis. `status` / `confidence` inferred from `InterlinearLexeme.IsGuess` and `.Score`.
    * - BT Extension: synthesized per-token from `gloss` / `lemmaText` / `senseIds`. BT Extension
    *   stores gloss per-token rather than as shared analysis objects — each token gets its own
    *   `TokenAnalysis`. `status` from `Instance.termStatusNum` (BiblicalTermStatus). `confidence`
@@ -655,16 +663,15 @@ declare module 'interlinearizer' {
     tokenSnapshot?: string;
 
     /**
-     * Free-form gloss string for this token, keyed by analysis-language BCP 47 tag. May coexist
-     * with `glossSenseRef` when the user has both a local override and a lexicon-backed sense — the
-     * local gloss takes precedence for rendering in that case.
+     * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over
+     * `glossSenseRef` when both are present.
      */
     gloss?: MultiString;
 
     /**
-     * Resolves the gloss through a specific `ISense` in the Lexicon extension, enabling automatic
-     * refresh when the lexicon is edited. May coexist with `gloss` when the user maintains both a
-     * lexicon-linked sense and a local override.
+     * Reference to the `ISense` in the Lexicon extension whose gloss text this analysis uses. May
+     * coexist with `gloss`; when both are present, `gloss` is the active rendering value and
+     * `glossSenseRef` is retained so the lexicon link is not lost.
      */
     glossSenseRef?: SenseRef;
   };
@@ -754,11 +761,10 @@ declare module 'interlinearizer' {
    * Each token may still carry its own `TokenAnalysis` alongside the phrase; the phrase contributes
    * the combined-unit gloss.
    *
-   * `gloss` is a free-form phrase gloss. `senseRef` alternatively points at a lexicon sense when
-   * the phrase is a multi-word lexical entry — the Lexicon extension supports both kinds via
-   * `IEntry.morphType = Phrase` (contiguous) or `DiscontiguousPhrase` (e.g. "ne … pas"). Both may
-   * be set concurrently: when both are present, `gloss` serves as a local override that takes
-   * precedence for rendering.
+   * `gloss` is a free-form phrase gloss. `senseRef` points at a lexicon sense when the phrase is a
+   * multi-word lexical entry — the Lexicon extension supports both kinds via `IEntry.morphType =
+   * Phrase` (contiguous) or `DiscontiguousPhrase` (e.g. "ne … pas"). Both may be present
+   * simultaneously; when they are, `gloss` takes precedence for rendering.
    *
    * Provenance fields (`producer`, `sourceUser`, `confidence`, `status`) let a suggestion engine
    * record proposed phrases that a user can then approve or reject, enabling automated recognition
@@ -814,17 +820,15 @@ declare module 'interlinearizer' {
     tokenSnapshots?: [string, ...string[]];
 
     /**
-     * Free-form phrase gloss keyed by analysis-language BCP 47 tag. May coexist with `senseRef`
-     * when the user has both a local override and a lexicon-backed sense — the local gloss takes
-     * precedence for rendering in that case.
+     * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over
+     * `senseRef` when both are present.
      */
     gloss?: MultiString;
 
     /**
-     * Points at a multi-word lexical entry in the Lexicon extension (e.g. `IEntry.morphType =
-     * Phrase` or `DiscontiguousPhrase`), enabling automatic gloss refresh when the lexicon is
-     * edited. May coexist with `gloss` when the user maintains both a lexicon-linked sense and a
-     * local override.
+     * Reference to the `ISense` in the Lexicon extension this phrase maps to. May coexist with
+     * `gloss`; when both are present, `gloss` is the active rendering value and `senseRef` is
+     * retained so the lexicon link is not lost.
      */
     senseRef?: SenseRef;
   };
@@ -944,7 +948,7 @@ declare module 'interlinearizer' {
 
   /**
    * The storage envelope for one interlinearizer project. Multiple projects may exist for the same
-   * pair of Platform.Bible projects (e.g. different analysis languages).
+   * source project (e.g. different analysis languages, or different target alignments).
    *
    * The token hierarchy (`Book` / `Segment` / `Token`) is **not** stored here — it is rebuilt from
    * Platform.Bible's USJ on each load. Only the analysis data and alignment links are persisted.
@@ -973,14 +977,29 @@ declare module 'interlinearizer' {
     sourceProjectId: string;
 
     /**
-     * BCP 47 tags for the languages used in glosses and annotations (e.g. `['en']`). Populates
+     * Platform.Bible project ID for the target text. Present only for bilateral alignment projects
+     * (e.g. BT Extension imports) where `AlignmentLink.targetEndpoints` must resolve to tokens in a
+     * second text. Omitted for analysis-only projects (LCM, PT9 single-sided glossing).
+     *
+     * When present, the `ActiveProject.target` books are rebuilt from this project's USJ on load,
+     * exactly as `ActiveProject.source` is rebuilt from `sourceProjectId`.
+     */
+    targetProjectId?: string;
+
+    /**
+     * BCP 47 tags for all languages used in glosses and annotations (e.g. `['en']`). Populates
      * `MultiString` keys in `TokenAnalysis`, `SegmentAnalysis`, and `Phrase` records.
+     *
+     * Source-system mapping:
+     *
+     * - LCM: the set of writing systems present on `IWfiGloss.Form` (one tag per analysis language in
+     *   the project).
+     * - Paratext: one tag per merged `GlossLanguage` file
+     *   (`Interlinear_{language}/Interlinear_{language}_{book}.xml`).
+     * - BT Extension: typically a single language; set from the per-token `gloss` writing system.
      */
     analysisLanguages: string[];
 
-    /** Platform.Bible project ID for the target text, if a target-side project is linked. */
-    targetProjectId?: string;
-
     /** Analysis layer. Empty at creation; populated as the user annotates tokens. */
     analysis: TextAnalysis;
 
@@ -992,16 +1011,21 @@ declare module 'interlinearizer' {
   }
 
   // ---------------------------------------------------------------------------
-  // §7 ActiveProject — runtime pairing of project envelope and source text
+  // §7 ActiveProject — runtime pairing of project envelope and text layers
   // ---------------------------------------------------------------------------
 
   /**
    * The runtime object for an open interlinearizer project. Pairs the persisted
-   * {@link InterlinearProject} envelope with the reconstructed source-text hierarchy.
+   * {@link InterlinearProject} envelope with the reconstructed text hierarchies.
+   *
+   * `source` and `target` are rebuilt from Platform.Bible's USJ on each load and are never
+   * serialized. All annotation and alignment mutations target `project.analysis` and
+   * `project.links`; saving is done by writing those fields back to storage via
+   * `saveProjectAnalysis`.
    *
-   * `source` is rebuilt from Platform.Bible's USJ on each load and is never serialized. All
-   * annotation and alignment mutations target `project.analysis` and `project.links`; saving is
-   * done by writing those fields back to storage via `saveProjectAnalysis`.
+   * `target` is present only when `project.targetProjectId` is set (bilateral alignment projects
+   * such as BT Extension imports). When present, `AlignmentLink.targetEndpoints` token IDs resolve
+   * against these books; when absent, only `sourceEndpoints` can be resolved.
    */
   export interface ActiveProject {
     /** The persisted project envelope. Mutations target `project.analysis` and `project.links`. */
@@ -1013,5 +1037,12 @@ declare module 'interlinearizer' {
      * be present when the UI has prefetched adjacent books.
      */
     source: Book[];
+
+    /**
+     * The reconstructed target books, built from `project.targetProjectId`'s USJ on load. Present
+     * only when `project.targetProjectId` is set; absent for analysis-only projects (LCM, PT9).
+     * Never serialized — rebuilt on every activation alongside `source`.
+     */
+    target?: Book[];
   }
 }

From dcaf9fbdef014b6abacd0af361fd8a9866b41019 Mon Sep 17 00:00:00 2001
From: alex-rawlings-yyc <alex.rawlings@wycliffe.ca>
Date: Fri, 15 May 2026 10:57:14 -0600
Subject: [PATCH 3/8] Make `analysisLanguages` required

---
 src/types/interlinearizer.d.ts | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts
index b9b9a9ae..b378fdcb 100644
--- a/src/types/interlinearizer.d.ts
+++ b/src/types/interlinearizer.d.ts
@@ -79,8 +79,8 @@ declare module 'papi-shared-types' {
      * @param interlinearProjectId UUID of the interlinearizer project to update.
      * @param name New user-facing name; omit or pass `undefined` to clear.
      * @param description New user-facing description; omit or pass `undefined` to clear.
-     * @param analysisLanguages New BCP 47 analysis language tags; omit or pass an empty array to
-     *   leave unchanged (the field is required and cannot be cleared).
+     * @param analysisLanguages New BCP 47 analysis language tags. Must be a non-empty array; pass
+     *   the current value to leave it unchanged (the field is required and cannot be cleared).
      * @param targetProjectId New target-project ID; omit or pass `undefined` to clear (removes the
      *   target-side text binding).
      */
@@ -88,7 +88,7 @@ declare module 'papi-shared-types' {
       interlinearProjectId: string,
       name: string | undefined,
       description: string | undefined,
-      analysisLanguages?: string[],
+      analysisLanguages: string[],
       targetProjectId?: string,
     ) => Promise<string | undefined>;
   }
@@ -601,7 +601,8 @@ declare module 'interlinearizer' {
    * - Paratext: `LexemeCluster` + `WordAnalysis`. `gloss` resolved from the selected
    *   `LexiconSense.Gloss` (per-language strings). `morphemes` from the `Lexeme[]` within
    *   `WordAnalysis` when `LexemeCluster.Type = WordParse`. Paratext stores POS on the lexeme, not
-   *   per-analysis. `status` / `confidence` inferred from `InterlinearLexeme.IsGuess` and `.Score`.
+   *   per-analysis. `status` / `confidence` inferred from `InterlinearLexeme.IsGuess` and
+   *   `.Score`.
    * - BT Extension: synthesized per-token from `gloss` / `lemmaText` / `senseIds`. BT Extension
    *   stores gloss per-token rather than as shared analysis objects — each token gets its own
    *   `TokenAnalysis`. `status` from `Instance.termStatusNum` (BiblicalTermStatus). `confidence`

From 0d23767815daee4b757a0e35af64ba7c1ddaf23b Mon Sep 17 00:00:00 2001
From: alex-rawlings-yyc <alex.rawlings@wycliffe.ca>
Date: Fri, 15 May 2026 10:57:22 -0600
Subject: [PATCH 4/8] Add comments about mapping of BT Extension's `sideNum`

---
 src/types/interlinearizer.d.ts | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts
index b378fdcb..771088b7 100644
--- a/src/types/interlinearizer.d.ts
+++ b/src/types/interlinearizer.d.ts
@@ -984,6 +984,11 @@ declare module 'interlinearizer' {
      *
      * When present, the `ActiveProject.target` books are rebuilt from this project's USJ on load,
      * exactly as `ActiveProject.source` is rebuilt from `sourceProjectId`.
+     *
+     * BT Extension: corresponds to one `Translation` scoped to two sides (`Translation.sideNum`).
+     * By BT convention `sideNum = 1` is the source and `sideNum = 2` is the target;
+     * `sourceProjectId` maps to the side-1 project and `targetProjectId` maps to the side-2
+     * project.
      */
     targetProjectId?: string;
 
@@ -1027,6 +1032,9 @@ declare module 'interlinearizer' {
    * `target` is present only when `project.targetProjectId` is set (bilateral alignment projects
    * such as BT Extension imports). When present, `AlignmentLink.targetEndpoints` token IDs resolve
    * against these books; when absent, only `sourceEndpoints` can be resolved.
+   *
+   * BT Extension: `source` corresponds to `Translation.sideNum = 1` and `target` to `sideNum = 2`,
+   * following BT's convention that side 1 is the input being analyzed and side 2 is the output.
    */
   export interface ActiveProject {
     /** The persisted project envelope. Mutations target `project.analysis` and `project.links`. */

From d5b280d3c491cc4b8f37cc594dcb76ff6073df93 Mon Sep 17 00:00:00 2001
From: alex-rawlings-yyc <alex.rawlings@wycliffe.ca>
Date: Fri, 15 May 2026 10:57:30 -0600
Subject: [PATCH 5/8] Update docs/schema

---
 src/types/interlinearizer.d.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts
index 771088b7..a5854b95 100644
--- a/src/types/interlinearizer.d.ts
+++ b/src/types/interlinearizer.d.ts
@@ -54,9 +54,10 @@ declare module 'papi-shared-types' {
     'interlinearizer.getProjectsForSource': (sourceProjectId: string) => Promise<string>;
 
     /**
-     * Deletes an interlinearizer project by UUID. No-ops silently if the project does not exist.
+     * Deletes an interlinearizer project by UUID.
      *
      * @param interlinearProjectId UUID of the interlinearizer project to delete.
+     * @throws {RangeError} If the project ID is not found in the stored index.
      */
     'interlinearizer.deleteProject': (interlinearProjectId: string) => Promise<void>;
 

From ab90e2c7b34d6a503a2beac79fd1bf0dc1248a7a Mon Sep 17 00:00:00 2001
From: alex-rawlings-yyc <alex.rawlings@wycliffe.ca>
Date: Fri, 15 May 2026 10:57:38 -0600
Subject: [PATCH 6/8] Further refinement; please see updated description

---
 src/types/interlinearizer.d.ts | 382 +++++++++++++++------------------
 1 file changed, 174 insertions(+), 208 deletions(-)

diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts
index a5854b95..949bbbd6 100644
--- a/src/types/interlinearizer.d.ts
+++ b/src/types/interlinearizer.d.ts
@@ -103,22 +103,23 @@ declare module 'papi-shared-types' {
  *
  * Shape at a glance:
  *
- *     ActiveProject
- *     ├─ project : InterlinearProject   — persisted envelope (analysis + links)
- *     ├─ source  : Book[]               — source text layer (rebuilt from USJ at runtime)
- *     └─ target? : Book[]               — target text layer (rebuilt from USJ at runtime; absent for analysis-only projects)
- *          └─ Segment[] → Token[]
+ *     InterlinearProject
+ *       ├─ sourceProjectId
+ *       ├─ targetProjectId?  — absent for analysis-only projects (LCM, PT9)
+ *       ├─ analysisLanguages : string[]
+ *       ├─ analysis : TextAnalysis
+ *       └─ links?   : AlignmentLink[]
  *
- *     InterlinearProject.analysis : TextAnalysis   — analysis layer (flat)
- *          ├─ segmentAnalyses : SegmentAnalysis[]    (per-segment translations)
- *          ├─ tokenAnalyses   : TokenAnalysis[]      (parse + 1:1 gloss)
- *          └─ phrases         : Phrase[]             (multi-token gloss)
+ *       ActiveProject
+ *       ├─ project : InterlinearProject
+ *       ├─ source  : Book[]
+ *       └─ target? : Book[]   — present only when targetProjectId is set
  *
  * The analysis layer is **flat** — not a mirror of the text layer's book / segment nesting. Every
  * analysis record carries an id reference back to its text-layer counterpart (`segmentId` /
- * `tokenId`). Consumers index by id at load time (`Map<tokenId, TokenAnalysis[]>`, etc.) to render
- * a segment at a time. This keeps the layer's containers honest — none exist just to mirror a
- * parent — and makes it trivial to add analyses without touching the text hierarchy.
+ * `tokenRef`). Consumers index by id at load time (`Map<tokenRef, TokenAnalysis[]>`, etc.) to
+ * render a segment at a time. This keeps the layer's containers honest — none exist just to mirror
+ * a parent — and makes it trivial to add analyses without touching the text hierarchy.
  *
  * Lexical information (entries, senses, allomorphs, grammar / MSA, …) is **not** stored in this
  * model. It lives in the Lexicon extension (`lexicon`); this model references it via `EntryRef` /
@@ -135,10 +136,10 @@ declare module 'papi-shared-types' {
  * `Segment.tokens` so the baseline text can be reconstructed faithfully. They are simply omitted
  * from the analysis layer's `tokenAnalyses` (rather than stored there with empty analyses).
  *
- * Staleness detection: analysis records and alignment endpoints carry a `tokenSnapshot` of the
- * token's surface text at analysis time. When the baseline changes, consumers compare the snapshot
- * against the current `Token.surfaceText` and flip `status` to `'stale'` on mismatch to prompt
- * re-review.
+ * Staleness detection: `TokenAnalysis` records carry a `tokenSnapshot` of the token's surface text
+ * at analysis time. `AlignmentEndpoint` records carry an equivalent snapshot via
+ * `token.surfaceText`. When the baseline changes, consumers compare the snapshot against the
+ * current `Token.surfaceText` and flip `status` to `'stale'` on mismatch to prompt re-review.
  */
 declare module 'interlinearizer' {
   // ---------------------------------------------------------------------------
@@ -183,10 +184,9 @@ declare module 'interlinearizer' {
   export type MultiString = Record<string, string>;
 
   /**
-   * A character-level scripture reference anchored to a specific position within a verse's baseline
-   * text. When `charIndex` is absent the reference is verse-level only — it identifies the verse as
-   * a whole without pinpointing a specific character. Consumers must treat an absent `charIndex` as
-   * "beginning of verse" when a character position is required.
+   * A verse-level scripture reference that may optionally be anchored to a character position
+   * within the verse's baseline text. When `charIndex` is absent the reference is verse-level
+   * only.
    */
   export interface ScriptureRef {
     /** 3-letter SIL book code (e.g. `"GEN"`). */
@@ -355,22 +355,28 @@ declare module 'interlinearizer' {
    */
   export interface Segment {
     /**
-     * Unique within the owning `Book` — used as the cross-reference key by
-     * `SegmentAnalysis.segmentId`.
+     * Stable identifier for this segment, unique within the owning `InterlinearProject`. In
+     * practice the id is project-wide unique because it is set to the verse SID (e.g. `"GEN 1:1"`).
+     * Used as the cross-reference key by `SegmentAnalysis.segmentId`.
      */
     id: string;
 
-    /** Inclusive start of the text range, anchored to a character position within its verse. */
+    /**
+     * Inclusive start of the text range. `charIndex` is set when a sub-verse character offset is
+     * known.
+     */
     startRef: ScriptureRef;
 
-    /** Inclusive end of the text range, anchored to a character position within its verse. */
+    /**
+     * Inclusive end of the text range. `charIndex` is set when a sub-verse character offset is
+     * known.
+     */
     endRef: ScriptureRef;
 
     /**
-     * Raw text of the segment. Required — token character offsets (`Token.charStart` /
-     * `Token.charEnd`) are expressed relative to this string, so it must be present for the text
-     * layer to be interpretable, particularly for scriptio continua scripts where token boundaries
-     * are not derivable from whitespace.
+     * Token character offsets (`Token.charStart` / `Token.charEnd`) are expressed relative to this
+     * string, so it must be present for the text layer to be interpretable, particularly for
+     * scriptio continua scripts where token boundaries are not derivable from whitespace.
      */
     baselineText: string;
 
@@ -414,15 +420,18 @@ declare module 'interlinearizer' {
    */
   export interface Token {
     /**
-     * Unique within the owning `Book` — used as the cross-reference key by `TokenAnalysis.tokenId`,
-     * `Phrase.tokenIds`, and `AlignmentEndpoint.tokenId`.
+     * Stable identifier for this token, unique within the owning `InterlinearProject`. In practice
+     * the ref is project-wide unique because it embeds the verse SID and the token's character
+     * offset (e.g. `"GEN 1:1:0"` for the first token in Genesis 1:1). Used as the cross-reference
+     * key by `TokenAnalysis.tokenRef`, `PhraseAnalysis.tokenRefs`, and
+     * `AlignmentEndpoint.token.tokenRef`.
      */
-    id: string;
+    ref: string;
 
     /** The token's text as it appears in the baseline. */
     surfaceText: string;
 
-    /** Writing system of `surfaceText`. */
+    /** BCP 47 writing-system tag for `surfaceText`. */
     writingSystem: string;
 
     /**
@@ -455,9 +464,9 @@ declare module 'interlinearizer' {
    * The analysis layer for an `InterlinearProject`.
    *
    * Flat by design — it does **not** mirror the text layer's book / segment nesting. Every record
-   * carries an id reference back to its text-layer counterpart (`segmentId` / `tokenId`). Consumers
-   * that need segment-local views build `Map<segmentId, …>` / `Map<tokenId, TokenAnalysis[]>` at
-   * load time.
+   * carries an id reference back to its text-layer counterpart (`segmentId` / `tokenRef`).
+   * Consumers that need segment-local views build `Map<segmentId, …>` / `Map<tokenRef,
+   * TokenAnalysis[]>` at load time.
    *
    * Keeping this layer flat avoids ceremonial container types whose only purpose is to mirror a
    * parent, and makes it trivial to add or remove analyses without touching the text hierarchy.
@@ -481,23 +490,25 @@ declare module 'interlinearizer' {
      *
      * **Invariant:** at most one `SegmentAnalysis` per `segmentId` has `status: 'approved'`. That
      * entry is the canonical segment-level analysis for rendering; alternates are available to
-     * review workflows via the other statuses.
+     * review workflows via the other statuses. This invariant is the caller's responsibility to
+     * maintain; no runtime enforcement exists.
      */
     segmentAnalyses: SegmentAnalysis[];
 
     /**
      * Token-level analyses, flat across the whole text. Each entry references its token by
-     * `tokenId`; the text layer keeps every token (words and punctuation) but this list typically
+     * `tokenRef`; the text layer keeps every token (words and punctuation) but this list typically
      * includes only the tokens being analyzed — punctuation is omitted rather than stored with
      * empty analyses.
      *
-     * Competing analyses are permitted: a single `tokenId` may have multiple `TokenAnalysis`
+     * Competing analyses are permitted: a single `tokenRef` may have multiple `TokenAnalysis`
      * entries (e.g. a parser's suggestion alongside a human's choice), distinguished by `status` /
      * `confidence` / `producer`.
      *
-     * **Invariant:** at most one `TokenAnalysis` per `tokenId` has `status: 'approved'`. That entry
-     * is the canonical analysis for rendering; alternates are available to review workflows via the
-     * other statuses (`'suggested'`, `'candidate'`, `'rejected'`, `'stale'`).
+     * **Invariant:** at most one `TokenAnalysis` per `tokenRef` has `status: 'approved'`. That
+     * entry is the canonical analysis for rendering; alternates are available to review workflows
+     * via the other statuses (`'suggested'`, `'candidate'`, `'rejected'`, `'stale'`). This
+     * invariant is the caller's responsibility to maintain; no runtime enforcement exists.
      */
     tokenAnalyses: TokenAnalysis[];
 
@@ -506,17 +517,51 @@ declare module 'interlinearizer' {
      * disjoint tokens and carries its own gloss. A phrase's member tokens may span multiple
      * segments.
      *
-     * Competing phrases are permitted: a given `tokenId` may appear in multiple `Phrase` records
-     * (e.g. a suggested phrase grouping plus a human-approved one) distinguished by `status`.
+     * Competing phrases are permitted: a given `tokenRef` may appear in multiple `PhraseAnalysis`
+     * records (e.g. a suggested phrase grouping plus a human-approved one) distinguished by
+     * `status`.
      *
      * **Invariants:**
      *
-     * - At most one `Phrase` containing a given `tokenId` has `status: Approved`. That phrase is
-     *   canonical for rendering.
-     * - A token may carry both a `TokenAnalysis` _and_ an approved `Phrase`; the per-token parse
-     *   coexists with the phrase-level gloss and is not a competing analysis.
+     * - At most one `PhraseAnalysis` containing a given `tokenRef` has `status: 'approved'`. That
+     *   phrase is canonical for rendering.
+     * - A token may carry both a `TokenAnalysis` _and_ an approved `PhraseAnalysis`; the per-token
+     *   parse coexists with the phrase-level gloss and is not a competing analysis.
      */
-    phrases: Phrase[];
+    phrases: PhraseAnalysis[];
+  }
+
+  /**
+   * Shared base for all analysis record types (`SegmentAnalysis`, `TokenAnalysis`,
+   * `PhraseAnalysis`). Carries the fields common to every analysis: stable identity, review status,
+   * and optional provenance.
+   */
+  export interface Analysis {
+    /** Unique within the owning `TextAnalysis` — stable reference for this record. */
+    id: string;
+
+    /** Required review status. */
+    status: AssignmentStatus;
+
+    /**
+     * How much to trust this analysis. Independent of who produced it — see `producer` /
+     * `sourceUser`.
+     */
+    confidence?: Confidence;
+
+    /**
+     * Free-form tag identifying what produced this analysis — e.g. `"human"`, `"parser"`,
+     * `"eflomal"`, or a specific tool name.
+     */
+    producer?: string;
+
+    /**
+     * User identifier for human-created or human-edited analyses. Omitted for purely
+     * machine-generated entries. Both `producer` and `sourceUser` may be set simultaneously when a
+     * human uses a tool-assisted workflow; `producer` names the tool and `sourceUser` identifies
+     * the human reviewer.
+     */
+    sourceUser?: string;
   }
 
   /**
@@ -532,17 +577,8 @@ declare module 'interlinearizer' {
    * - BT Extension: free / literal translations are not natively stored — typically absent unless
    *   synthesized.
    */
-  export interface SegmentAnalysis {
-    /**
-     * Unique within the owning `TextAnalysis` — used as a stable reference for this analysis
-     * record.
-     */
-    id: string;
-
-    /**
-     * Reference to the corresponding `Segment.id` in the text layer (unique within the owning
-     * `Book`).
-     */
+  export interface SegmentAnalysis extends Analysis {
+    /** Reference to the corresponding `Segment.id` in the text layer. */
     segmentId: string;
 
     /** Idiomatic translation of the segment. */
@@ -550,27 +586,6 @@ declare module 'interlinearizer' {
 
     /** Word-for-word translation. May be generated from token glosses. */
     literalTranslation?: MultiString;
-
-    /**
-     * How much to trust this segment-level analysis. Independent of who produced it — see
-     * `producer` / `sourceUser` for that.
-     */
-    confidence?: Confidence;
-
-    /** Required review status. */
-    status: AssignmentStatus;
-
-    /**
-     * Free-form tag identifying what produced this analysis — e.g. `"human"`, `"bt-draft"`, or a
-     * specific tool name.
-     */
-    producer?: string;
-
-    /**
-     * User identifier for human-created or human-edited analyses. Omitted for purely
-     * machine-generated entries.
-     */
-    sourceUser?: string;
   }
 
   // ---------------------------------------------------------------------------
@@ -610,18 +625,16 @@ declare module 'interlinearizer' {
    *   inferred from status. No morpheme decomposition — `morphemes` is either empty or a single
    *   whole-word morpheme. `pos` available from Macula TSV for source-language tokens only.
    */
-  export type TokenAnalysis = {
+  export interface TokenAnalysis extends Analysis {
+    /** Reference to the `Token.ref` being analyzed. */
+    tokenRef: string;
+
     /**
-     * Unique within the owning `TextAnalysis` — used as the cross-reference key by
-     * `AlignmentEndpoint.tokenAnalysisId` for morpheme-level alignment links.
+     * Ordered morpheme breakdown. Present when the analysis reaches sub-word granularity (e.g. an
+     * LCM `IWfiAnalysis` with `MorphBundlesOS`). Absent when the analysis treats the token as a
+     * single whole-word unit.
      */
-    id: string;
-
-    /** Reference to the `Token.id` being analyzed (unique within the owning `Book`). */
-    tokenId: string;
-
-    /** Ordered morpheme breakdown. Omitted for whole-word analyses. */
-    morphemes?: Morpheme[];
+    morphemes?: MorphemeAnalysis[];
 
     /** Part of speech (free-form tag or lexicon POS id). */
     pos?: string;
@@ -632,35 +645,10 @@ declare module 'interlinearizer' {
      */
     features?: Record<string, string>;
 
-    /**
-     * How much to trust this analysis. Independent of who produced it — see `producer` /
-     * `sourceUser` for that.
-     */
-    confidence?: Confidence;
-
-    /** Required review status. */
-    status: AssignmentStatus;
-
-    /**
-     * Free-form tag identifying what produced this analysis — e.g. `"human"`, `"parser"`,
-     * `"eflomal"`, or a specific tool name. Distinguishes human edits from each of several possible
-     * engines.
-     */
-    producer?: string;
-
-    /**
-     * User identifier for human-created or human-edited analyses. Omitted for purely
-     * machine-generated entries.
-     */
-    sourceUser?: string;
-
     /**
      * Surface text of the token at analysis time — used for drift detection. Consumers compare this
      * against the current `Token.surfaceText`; on mismatch, flip `status` to `'stale'` to prompt
      * re-review.
-     *
-     * Holds the raw surface text for debuggability; can be swapped for a hash if storage cost
-     * becomes a concern (token text is typically short, so the literal string is usually fine).
      */
     tokenSnapshot?: string;
 
@@ -676,10 +664,13 @@ declare module 'interlinearizer' {
      * `glossSenseRef` is retained so the lexicon link is not lost.
      */
     glossSenseRef?: SenseRef;
-  };
+  }
 
   /**
-   * An ordered morpheme within a token's parse.
+   * Analysis of one morpheme within a token's parse. Unlike `TokenAnalysis` and `SegmentAnalysis`,
+   * which reference their subject by id, `MorphemeAnalysis` owns the morpheme itself: `form` and
+   * `writingSystem` store the structural data directly, while the optional refs link it into the
+   * Lexicon extension for lexical resolution.
    *
    * `form` is the morpheme's surface text as it appeared in this analysis context — which may
    * differ from the citation form on the referenced lexicon entry (e.g. under phonological
@@ -708,10 +699,10 @@ declare module 'interlinearizer' {
    *   from the lemma. `allomorphRef` / `grammarRef` are left unset — BT Extension does not carry
    *   these.
    */
-  export interface Morpheme {
+  export interface MorphemeAnalysis {
     /**
      * Unique within the owning `TokenAnalysis.morphemes` array — used as the cross-reference key by
-     * `AlignmentEndpoint.morphemeId`.
+     * `MorphemeLink.morphemeId`.
      */
     id: string;
 
@@ -748,13 +739,13 @@ declare module 'interlinearizer' {
   }
 
   // ---------------------------------------------------------------------------
-  // §4 Phrase — multi-token gloss unit
+  // §4 PhraseAnalysis — multi-token gloss unit
   // ---------------------------------------------------------------------------
 
   /**
    * A multi-token unit glossed or analyzed as a single phrase.
    *
-   * `tokenIds` lists the tokens (in order) that belong to the phrase. The tokens may be:
+   * `tokenRefs` lists the tokens (in order) that belong to the phrase. The tokens may be:
    *
    * - Adjacent within one segment ("en el" → "in the")
    * - Disjoint within one segment (French "ne … pas" → "not")
@@ -775,51 +766,16 @@ declare module 'interlinearizer' {
    * Source-system mapping:
    *
    * - LCM: LCM does not natively model multi-word phrases as first-class objects. Multi-word glosses,
-   *   when present, must be synthesized as `Phrase` records during import.
+   *   when present, must be synthesized as `PhraseAnalysis` records during import.
    * - Paratext: a `LexemeCluster` with `Type = Phrase` spans multiple words — each such cluster
-   *   becomes one `Phrase` whose `tokenIds` enumerate the covered tokens. `senseRef` is the
-   *   selected `LexemeData` reference for the phrase.
+   *   becomes one `PhraseAnalysis` whose `tokenRefs` enumerate the covered tokens. `senseRef` is
+   *   the selected `LexemeData` reference for the phrase.
    * - BT Extension: not natively tracked. Must be synthesized during migration when adjacent tokens
    *   share the same gloss / sense.
    */
-  export type Phrase = {
-    /** Unique within the owning `TextAnalysis` — used as a stable reference for this phrase record. */
-    id: string;
-
-    /** Ordered `Token.id` values that compose this phrase. */
-    tokenIds: [string, ...string[]];
-
-    /** Required review status. */
-    status: AssignmentStatus;
-
-    /**
-     * How much to trust this phrase. Independent of who produced it — see `producer` / `sourceUser`
-     * for that.
-     */
-    confidence?: Confidence;
-
-    /**
-     * Free-form tag identifying what produced this phrase — e.g. `"human"`, `"phrase-detector"`, or
-     * a specific tool name.
-     */
-    producer?: string;
-
-    /**
-     * User identifier for human-created or human-edited phrases. Omitted for purely
-     * machine-generated entries.
-     */
-    sourceUser?: string;
-
-    /**
-     * Surface text of each token at creation time, parallel to `tokenIds`. Enables drift detection
-     * for phrases — if any index's snapshot no longer matches the current `Token.surfaceText`, the
-     * phrase is flagged `Stale`.
-     *
-     * **Invariant:** when present, `tokenSnapshots` must have the same length as `tokenIds` and
-     * each index `i` corresponds to the `Token.surfaceText` for `tokenIds[i]`. Consumers must
-     * maintain this alignment when filtering or transforming tokens.
-     */
-    tokenSnapshots?: [string, ...string[]];
+  export interface PhraseAnalysis extends Analysis {
+    /** Ordered `Token.ref` values of the tokens that compose this phrase. */
+    tokenRefs: [string, ...string[]];
 
     /**
      * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over
@@ -833,7 +789,7 @@ declare module 'interlinearizer' {
      * retained so the lexicon link is not lost.
      */
     senseRef?: SenseRef;
-  };
+  }
 
   // ---------------------------------------------------------------------------
   // §5 AlignmentLink, AlignmentEndpoint
@@ -883,66 +839,75 @@ declare module 'interlinearizer' {
      */
     confidence?: Confidence;
 
-    /** Multilingual notes keyed by writing system (e.g. UI locale). */
+    /** Multilingual notes keyed by BCP 47 writing-system tag (e.g. `'en'`, `'fr'`). */
     notes?: MultiString;
   }
 
   /**
    * One side of an alignment link.
    *
-   * When `morphemeId` is set the link connects at the morpheme level. Because a single token may
-   * have multiple competing `TokenAnalysis` entries, `tokenAnalysisId` is **required** alongside
-   * `morphemeId` to identify the specific `TokenAnalysis` that owns the referenced morpheme. When
-   * `morphemeId` is absent the link connects to the whole token.
-   *
-   * Exactly one of two shapes is valid — setting `morphemeId` without `tokenAnalysisId` (or vice
-   * versa) is a TypeScript type error:
+   * When `morphemeLink` is set the link connects at the morpheme level. Because a single token may
+   * have multiple competing `TokenAnalysis` entries, `morphemeLink.tokenAnalysisId` is **required**
+   * alongside `morphemeLink.morphemeId` to identify the specific `TokenAnalysis` that owns the
+   * referenced morpheme. When `morphemeLink` is absent the link connects to the whole token.
    *
-   * - Token-level: neither `morphemeId` nor `tokenAnalysisId` is present.
-   * - Morpheme-level: both `morphemeId` and `tokenAnalysisId` are present.
+   * Resolution chain (morpheme-level): AlignmentEndpoint → Token (via `token.tokenRef`) →
+   * TokenAnalysis (via `morphemeLink.tokenAnalysisId`) → MorphemeAnalysis (via
+   * `morphemeLink.morphemeId`) → EntryRef → `IEntry` (Lexicon extension) → SenseRef → `ISense`
+   * (Lexicon extension)
    *
-   * Resolution chain (morpheme-level): AlignmentEndpoint → Token (via `tokenId`) → TokenAnalysis
-   * (via `tokenAnalysisId`) → Morpheme (via `morphemeId`) → EntryRef → `IEntry` (Lexicon extension)
-   * → SenseRef → `ISense` (Lexicon extension)
-   *
-   * Resolution chain (token-level): AlignmentEndpoint → Token (surface text only)
+   * Resolution chain (token-level): AlignmentEndpoint → Token (via `token.tokenRef`) →
+   * `Token.surfaceText` (display) / `TokenAnalysis[]` (analysis, looked up by `tokenRef`)
    *
    * Source-system mapping:
    *
    * - LCM / Paratext: endpoints produced only through external tools or parallel-project inference
    *   (see `AlignmentLink`).
    * - BT Extension: one endpoint per `Instance` in an `Alignment`'s `sourceInstances` /
-   *   `targetInstances`. `morphemeId` and `tokenAnalysisId` are set when the token has a
-   *   morpheme-level parse; otherwise the endpoint targets the whole token.
+   *   `targetInstances`. `morphemeLink` is set when the token has a morpheme-level parse; otherwise
+   *   the endpoint targets the whole token.
    */
-  export type AlignmentEndpoint = {
-    /** The `Token.id` this endpoint targets. */
-    tokenId: string;
+  export interface AlignmentEndpoint {
+    /**
+     * Token targeted by this endpoint. Identifies the token via `tokenRef` and carries a surface
+     * text snapshot for drift detection.
+     */
+    token: TokenSnapshot;
+
+    /**
+     * When set, narrows the endpoint to a specific morpheme within the token's parse. When absent,
+     * the endpoint targets the whole token.
+     */
+    morphemeLink?: MorphemeLink;
+  }
+
+  /**
+   * A snapshot of a token at the time an alignment endpoint was created. Carries the stable token
+   * reference and a copy of its surface text for drift detection.
+   */
+  export interface TokenSnapshot {
+    /** `Token.ref` of the targeted token. */
+    tokenRef: string;
 
     /**
      * Surface text of the token at link-creation time — used for drift detection. A link whose
      * endpoint snapshot no longer matches the current `Token.surfaceText` is stale; consumers flip
      * the link's `status` to `'stale'` to prompt re-review.
      */
-    tokenSnapshot?: string;
-  } &
-    /**
-     * Either both `morphemeId` and `tokenAnalysisId` are set (morpheme-level link), or neither is
-     * set (token-level link). `tokenAnalysisId` is required alongside `morphemeId` because a single
-     * token may have multiple competing `TokenAnalysis` entries; without it the target morpheme
-     * would be ambiguous.
-     */
-    (| { morphemeId?: never; tokenAnalysisId?: never }
-      | {
-          /**
-           * The `TokenAnalysis.id` that owns the referenced morpheme. Required when `morphemeId` is
-           * set.
-           */
-          tokenAnalysisId: string;
-          /** Specific `Morpheme.id` within the identified `TokenAnalysis.morphemes`. */
-          morphemeId: string;
-        }
-    );
+    surfaceText: string;
+  }
+
+  /**
+   * Identifies a specific morpheme within a token's parse for morpheme-level alignment endpoints.
+   * Both fields are required together: `tokenAnalysisId` selects the `TokenAnalysis` (since a token
+   * may have multiple competing analyses) and `morphemeId` selects the morpheme within it.
+   */
+  export interface MorphemeLink {
+    /** The `TokenAnalysis.id` that owns the referenced morpheme. */
+    tokenAnalysisId: string;
+    /** Specific `MorphemeAnalysis.id` within the identified `TokenAnalysis.morphemes`. */
+    morphemeId: string;
+  }
 
   // ---------------------------------------------------------------------------
   // §6 InterlinearProject — persisted project envelope
@@ -954,8 +919,8 @@ declare module 'interlinearizer' {
    *
    * The token hierarchy (`Book` / `Segment` / `Token`) is **not** stored here — it is rebuilt from
    * Platform.Bible's USJ on each load. Only the analysis data and alignment links are persisted.
-   * Token-level drift is detected via `tokenSnapshot` fields on `TokenAnalysis` and
-   * `AlignmentEndpoint` records.
+   * Token-level drift is detected via `tokenSnapshot` on `TokenAnalysis` records and via
+   * `token.surfaceText` on `AlignmentEndpoint` records.
    *
    * Projects are stored via `papi.storage` (extension-host only) under two keys:
    *
@@ -995,7 +960,7 @@ declare module 'interlinearizer' {
 
     /**
      * BCP 47 tags for all languages used in glosses and annotations (e.g. `['en']`). Populates
-     * `MultiString` keys in `TokenAnalysis`, `SegmentAnalysis`, and `Phrase` records.
+     * `MultiString` keys in `TokenAnalysis`, `SegmentAnalysis`, and `PhraseAnalysis` records.
      *
      * Source-system mapping:
      *
@@ -1011,10 +976,11 @@ declare module 'interlinearizer' {
     analysis: TextAnalysis;
 
     /**
-     * Token- or morpheme-level alignment links. Empty at creation; populated as the user aligns
-     * source and target tokens.
+     * Token- or morpheme-level alignment links. Absent (`undefined`) at creation for analysis-only
+     * projects; present (possibly empty) for bilateral alignment projects. Populated as the user
+     * aligns source and target tokens.
      */
-    links: AlignmentLink[];
+    links?: AlignmentLink[];
   }
 
   // ---------------------------------------------------------------------------

From 1c228c94fd01d88c2153ebcc43d6b3e7455d6eb1 Mon Sep 17 00:00:00 2001
From: "D. Ror." <imnasnainaec@gmail.com>
Date: Fri, 15 May 2026 10:57:48 -0600
Subject: [PATCH 7/8] Suggested model tweak

---
 src/types/interlinearizer.d.ts | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts
index 949bbbd6..8af89c99 100644
--- a/src/types/interlinearizer.d.ts
+++ b/src/types/interlinearizer.d.ts
@@ -528,7 +528,7 @@ declare module 'interlinearizer' {
      * - A token may carry both a `TokenAnalysis` _and_ an approved `PhraseAnalysis`; the per-token
      *   parse coexists with the phrase-level gloss and is not a competing analysis.
      */
-    phrases: PhraseAnalysis[];
+    phraseAnalyses: PhraseAnalysis[];
   }
 
   /**
@@ -626,8 +626,8 @@ declare module 'interlinearizer' {
    *   whole-word morpheme. `pos` available from Macula TSV for source-language tokens only.
    */
   export interface TokenAnalysis extends Analysis {
-    /** Reference to the `Token.ref` being analyzed. */
-    tokenRef: string;
+    /** Snapshot of the token being analyzed. */
+    token: TokenSnapshot;
 
     /**
      * Ordered morpheme breakdown. Present when the analysis reaches sub-word granularity (e.g. an
@@ -645,13 +645,6 @@ declare module 'interlinearizer' {
      */
     features?: Record<string, string>;
 
-    /**
-     * Surface text of the token at analysis time — used for drift detection. Consumers compare this
-     * against the current `Token.surfaceText`; on mismatch, flip `status` to `'stale'` to prompt
-     * re-review.
-     */
-    tokenSnapshot?: string;
-
     /**
      * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over
      * `glossSenseRef` when both are present.
@@ -745,7 +738,7 @@ declare module 'interlinearizer' {
   /**
    * A multi-token unit glossed or analyzed as a single phrase.
    *
-   * `tokenRefs` lists the tokens (in order) that belong to the phrase. The tokens may be:
+   * `tokens` lists the tokens (in order) that belong to the phrase. The tokens may be:
    *
    * - Adjacent within one segment ("en el" → "in the")
    * - Disjoint within one segment (French "ne … pas" → "not")
@@ -774,8 +767,8 @@ declare module 'interlinearizer' {
    *   share the same gloss / sense.
    */
   export interface PhraseAnalysis extends Analysis {
-    /** Ordered `Token.ref` values of the tokens that compose this phrase. */
-    tokenRefs: [string, ...string[]];
+    /** Ordered snapshots of tokens that compose this phrase. */
+    tokens: [TokenSnapshot, ...TokenSnapshot[]];
 
     /**
      * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over
@@ -905,6 +898,7 @@ declare module 'interlinearizer' {
   export interface MorphemeLink {
     /** The `TokenAnalysis.id` that owns the referenced morpheme. */
     tokenAnalysisId: string;
+
     /** Specific `MorphemeAnalysis.id` within the identified `TokenAnalysis.morphemes`. */
     morphemeId: string;
   }

From 7c08b8ade4494852ccf9e6e263db6e3d49590d05 Mon Sep 17 00:00:00 2001
From: "D. Ror." <imnasnainaec@gmail.com>
Date: Fri, 15 May 2026 10:58:11 -0600
Subject: [PATCH 8/8] Model idea: Split linking out from analyses

---
 src/types/interlinearizer.d.ts | 178 +++++++++++++++++++++------------
 1 file changed, 112 insertions(+), 66 deletions(-)

diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts
index 8af89c99..79208d78 100644
--- a/src/types/interlinearizer.d.ts
+++ b/src/types/interlinearizer.d.ts
@@ -4,6 +4,7 @@
  */
 
 declare module 'papi-shared-types' {
+  /** Project-level settings contributed by the Interlinearizer extension. */
   export interface ProjectSettingTypes {
     /**
      * When true, the Interlinearizer displays a continuous horizontal token scroll strip above the
@@ -103,6 +104,11 @@ declare module 'papi-shared-types' {
  *
  * Shape at a glance:
  *
+ *     ActiveProject
+ *       ├─ project : InterlinearProject
+ *       ├─ source  : Book[]
+ *       └─ target? : Book[]   — present only when targetProjectId is set
+ *
  *     InterlinearProject
  *       ├─ sourceProjectId
  *       ├─ targetProjectId?  — absent for analysis-only projects (LCM, PT9)
@@ -110,16 +116,19 @@ declare module 'papi-shared-types' {
  *       ├─ analysis : TextAnalysis
  *       └─ links?   : AlignmentLink[]
  *
- *       ActiveProject
- *       ├─ project : InterlinearProject
- *       ├─ source  : Book[]
- *       └─ target? : Book[]   — present only when targetProjectId is set
+ *     TextAnalysis
+ *       ├─ segmentAnalyses      : SegmentAnalysis[]
+ *       ├─ segmentAnalysisLinks : SegmentAnalysisLink[]
+ *       ├─ tokenAnalyses        : TokenAnalysis[]
+ *       ├─ tokenAnalysisLinks   : TokenAnalysisLink[]
+ *       ├─ phraseAnalyses       : PhraseAnalysis[]
+ *       └─ phraseAnalysisLinks  : PhraseAnalysisLink[]
  *
- * The analysis layer is **flat** — not a mirror of the text layer's book / segment nesting. Every
- * analysis record carries an id reference back to its text-layer counterpart (`segmentId` /
- * `tokenRef`). Consumers index by id at load time (`Map<tokenRef, TokenAnalysis[]>`, etc.) to
- * render a segment at a time. This keeps the layer's containers honest — none exist just to mirror
- * a parent — and makes it trivial to add analyses without touching the text hierarchy.
+ * The analysis layer is **flat** — not a mirror of the text layer's book / segment nesting.
+ * Analysis payloads (`SegmentAnalysis`, `TokenAnalysis`, `PhraseAnalysis`) are stored separately
+ * from their text-layer attachments. Link records (`segmentAnalysisLinks`, `tokenAnalysisLinks`,
+ * `phraseAnalysisLinks`) connect each analysis id to one segment or one/many tokens. Consumers
+ * index links by segment/token ids at load time to render a segment at a time.
  *
  * Lexical information (entries, senses, allomorphs, grammar / MSA, …) is **not** stored in this
  * model. It lives in the Lexicon extension (`lexicon`); this model references it via `EntryRef` /
@@ -136,10 +145,9 @@ declare module 'papi-shared-types' {
  * `Segment.tokens` so the baseline text can be reconstructed faithfully. They are simply omitted
  * from the analysis layer's `tokenAnalyses` (rather than stored there with empty analyses).
  *
- * Staleness detection: `TokenAnalysis` records carry a `tokenSnapshot` of the token's surface text
- * at analysis time. `AlignmentEndpoint` records carry an equivalent snapshot via
- * `token.surfaceText`. When the baseline changes, consumers compare the snapshot against the
- * current `Token.surfaceText` and flip `status` to `'stale'` on mismatch to prompt re-review.
+ * Staleness detection: `AlignmentEndpoint` records carry a token snapshot via `token.surfaceText`.
+ * When the baseline changes, consumers compare the snapshot against the current `Token.surfaceText`
+ * and flip `status` to `'stale'` on mismatch to prompt re-review.
  */
 declare module 'interlinearizer' {
   // ---------------------------------------------------------------------------
@@ -167,8 +175,8 @@ declare module 'interlinearizer' {
    * - `candidate` — proposed but not yet reviewed
    * - `rejected` — explicitly rejected by a human
    * - `stale` — the underlying token text has changed since this record was created; the record needs
-   *   human review. Set by drift-detection logic comparing `tokenSnapshot` against the current
-   *   `Token.surfaceText`.
+   *   human review. Set by drift-detection logic comparing stored `TokenSnapshot.surfaceText`
+   *   values against the current `Token.surfaceText`.
    */
   export type AssignmentStatus = 'approved' | 'suggested' | 'candidate' | 'rejected' | 'stale';
 
@@ -357,7 +365,7 @@ declare module 'interlinearizer' {
     /**
      * Stable identifier for this segment, unique within the owning `InterlinearProject`. In
      * practice the id is project-wide unique because it is set to the verse SID (e.g. `"GEN 1:1"`).
-     * Used as the cross-reference key by `SegmentAnalysis.segmentId`.
+     * Used as the segment-side key by `SegmentAnalysisLink.segmentId`.
      */
     id: string;
 
@@ -422,8 +430,8 @@ declare module 'interlinearizer' {
     /**
      * Stable identifier for this token, unique within the owning `InterlinearProject`. In practice
      * the ref is project-wide unique because it embeds the verse SID and the token's character
-     * offset (e.g. `"GEN 1:1:0"` for the first token in Genesis 1:1). Used as the cross-reference
-     * key by `TokenAnalysis.tokenRef`, `PhraseAnalysis.tokenRefs`, and
+     * offset (e.g. `"GEN 1:1:0"` for the first token in Genesis 1:1). Used as the token-side key by
+     * `TokenAnalysisLink.token.tokenRef`, `PhraseAnalysisLink.tokens[*].tokenRef`, and
      * `AlignmentEndpoint.token.tokenRef`.
      */
     ref: string;
@@ -463,10 +471,9 @@ declare module 'interlinearizer' {
   /**
    * The analysis layer for an `InterlinearProject`.
    *
-   * Flat by design — it does **not** mirror the text layer's book / segment nesting. Every record
-   * carries an id reference back to its text-layer counterpart (`segmentId` / `tokenRef`).
-   * Consumers that need segment-local views build `Map<segmentId, …>` / `Map<tokenRef,
-   * TokenAnalysis[]>` at load time.
+   * Flat by design — it does **not** mirror the text layer's book / segment nesting. Analysis
+   * payload records are linked to the text layer through the corresponding `*AnalysisLinks` arrays.
+   * Consumers that need segment-local views build indexes from those links at load time.
    *
    * Keeping this layer flat avoids ceremonial container types whose only purpose is to mirror a
    * parent, and makes it trivial to add or remove analyses without touching the text hierarchy.
@@ -481,67 +488,103 @@ declare module 'interlinearizer' {
    */
   export interface TextAnalysis {
     /**
-     * Per-segment analysis records, keyed to `Segment.id` via `segmentId`. Carries only
-     * segment-level data (free / literal translations); token-level data lives in `tokenAnalyses`.
+     * Per-segment analysis payload records. Carries only segment-level data (free / literal
+     * translations); token-level data lives in `tokenAnalyses`.
      *
-     * Competing analyses are permitted: a single `segmentId` may have multiple `SegmentAnalysis`
+     * Competing analyses are permitted: a single segment may have multiple linked `SegmentAnalysis`
      * entries (e.g. an AI-drafted back translation alongside a human-edited one), distinguished by
      * `status` / `confidence` / `producer`.
      *
-     * **Invariant:** at most one `SegmentAnalysis` per `segmentId` has `status: 'approved'`. That
-     * entry is the canonical segment-level analysis for rendering; alternates are available to
-     * review workflows via the other statuses. This invariant is the caller's responsibility to
-     * maintain; no runtime enforcement exists.
+     * **Invariant:** for a given segment, at most one linked `SegmentAnalysisLink` should have
+     * `status: 'approved'`. That linked analysis is the canonical segment-level analysis for
+     * rendering; alternates are available to review workflows via the other statuses. This
+     * invariant is the caller's responsibility to maintain; no runtime enforcement exists.
      */
     segmentAnalyses: SegmentAnalysis[];
 
     /**
-     * Token-level analyses, flat across the whole text. Each entry references its token by
-     * `tokenRef`; the text layer keeps every token (words and punctuation) but this list typically
-     * includes only the tokens being analyzed — punctuation is omitted rather than stored with
-     * empty analyses.
+     * Links each `SegmentAnalysis.id` to a single `Segment.id`, along with review metadata for that
+     * assignment.
+     */
+    segmentAnalysisLinks: SegmentAnalysisLink[];
+
+    /**
+     * Token-level analysis payload records, flat across the whole text. The text layer keeps every
+     * token (words and punctuation) but this list typically includes only tokens being analyzed —
+     * punctuation is omitted rather than stored with empty analyses.
      *
-     * Competing analyses are permitted: a single `tokenRef` may have multiple `TokenAnalysis`
+     * Competing analyses are permitted: a single token may have multiple linked `TokenAnalysis`
      * entries (e.g. a parser's suggestion alongside a human's choice), distinguished by `status` /
      * `confidence` / `producer`.
      *
-     * **Invariant:** at most one `TokenAnalysis` per `tokenRef` has `status: 'approved'`. That
-     * entry is the canonical analysis for rendering; alternates are available to review workflows
-     * via the other statuses (`'suggested'`, `'candidate'`, `'rejected'`, `'stale'`). This
-     * invariant is the caller's responsibility to maintain; no runtime enforcement exists.
+     * **Invariant:** for a given token, at most one linked `TokenAnalysisLink` should have `status:
+     * 'approved'`. That linked analysis is the canonical analysis for rendering; alternates are
+     * available to review workflows via the other statuses (`'suggested'`, `'candidate'`,
+     * `'rejected'`, `'stale'`). This invariant is the caller's responsibility to maintain; no
+     * runtime enforcement exists.
      */
     tokenAnalyses: TokenAnalysis[];
 
+    /**
+     * Links each `TokenAnalysis.id` to one token snapshot, along with review metadata for that
+     * assignment.
+     */
+    tokenAnalysisLinks: TokenAnalysisLink[];
+
     /**
      * Multi-token phrase analyses, flat across the whole text. A phrase may group adjacent or
      * disjoint tokens and carries its own gloss. A phrase's member tokens may span multiple
      * segments.
      *
-     * Competing phrases are permitted: a given `tokenRef` may appear in multiple `PhraseAnalysis`
+     * Competing phrases are permitted: a given token may appear in multiple linked `PhraseAnalysis`
      * records (e.g. a suggested phrase grouping plus a human-approved one) distinguished by
      * `status`.
      *
      * **Invariants:**
      *
-     * - At most one `PhraseAnalysis` containing a given `tokenRef` has `status: 'approved'`. That
-     *   phrase is canonical for rendering.
+     * - At most one linked `PhraseAnalysisLink` containing a given token should have `status:
+     *   'approved'`. That phrase is canonical for rendering.
      * - A token may carry both a `TokenAnalysis` _and_ an approved `PhraseAnalysis`; the per-token
      *   parse coexists with the phrase-level gloss and is not a competing analysis.
      */
     phraseAnalyses: PhraseAnalysis[];
+
+    /**
+     * Links each `PhraseAnalysis.id` to one or more token snapshots, along with review metadata for
+     * that assignment.
+     */
+    phraseAnalysisLinks: PhraseAnalysisLink[];
+  }
+
+  /** Shared link metadata for attaching an analysis payload record to text-layer targets. */
+  export interface AnalysisLink {
+    /** The `Analysis.id` for the linked analysis payload record. */
+    analysisId: string;
+
+    /** Required review status. */
+    status: AssignmentStatus;
+
+    /** How much to trust this analysis assignment. */
+    confidence?: Confidence;
+  }
+
+  /** Links one `SegmentAnalysis` payload record to a single source segment. */
+  export interface SegmentAnalysisLink extends AnalysisLink {
+    /** Reference to the corresponding `Segment.id` in the text layer. */
+    segmentId: string;
   }
 
   /**
-   * Shared base for all analysis record types (`SegmentAnalysis`, `TokenAnalysis`,
-   * `PhraseAnalysis`). Carries the fields common to every analysis: stable identity, review status,
-   * and optional provenance.
+   * Shared base for all analysis payload record types (`SegmentAnalysis`, `TokenAnalysis`,
+   * `PhraseAnalysis`). Carries fields common to each analysis payload: stable identity, token
+   * surface text, and optional provenance.
    */
   export interface Analysis {
     /** Unique within the owning `TextAnalysis` — stable reference for this record. */
     id: string;
 
-    /** Required review status. */
-    status: AssignmentStatus;
+    /** Surface form of the analyzed text span (token, phrase, or segment). */
+    surfaceText: string;
 
     /**
      * How much to trust this analysis. Independent of who produced it — see `producer` /
@@ -565,8 +608,8 @@ declare module 'interlinearizer' {
   }
 
   /**
-   * Per-segment analysis record. Carries data that belongs to a segment as a whole (free / literal
-   * translations). Token analyses and phrases live on `TextAnalysis` directly, keyed by id.
+   * Per-segment analysis payload record. Carries data that belongs to a segment as a whole (free /
+   * literal translations). Token analyses and phrases live on `TextAnalysis` directly.
    *
    * Source-system mapping:
    *
@@ -578,9 +621,6 @@ declare module 'interlinearizer' {
    *   synthesized.
    */
   export interface SegmentAnalysis extends Analysis {
-    /** Reference to the corresponding `Segment.id` in the text layer. */
-    segmentId: string;
-
     /** Idiomatic translation of the segment. */
     freeTranslation?: MultiString;
 
@@ -592,6 +632,12 @@ declare module 'interlinearizer' {
   // §3 TokenAnalysis — parse + 1:1 gloss
   // ---------------------------------------------------------------------------
 
+  /** Links one `TokenAnalysis` payload record to exactly one token snapshot. */
+  export interface TokenAnalysisLink extends AnalysisLink {
+    /** Token that this analysis refers to. */
+    token: TokenSnapshot;
+  }
+
   /**
    * Analysis of a single token: a word-level (1:1) gloss plus optional morpheme-level parse.
    *
@@ -626,9 +672,6 @@ declare module 'interlinearizer' {
    *   whole-word morpheme. `pos` available from Macula TSV for source-language tokens only.
    */
   export interface TokenAnalysis extends Analysis {
-    /** Snapshot of the token being analyzed. */
-    token: TokenSnapshot;
-
     /**
      * Ordered morpheme breakdown. Present when the analysis reaches sub-word granularity (e.g. an
      * LCM `IWfiAnalysis` with `MorphBundlesOS`). Absent when the analysis treats the token as a
@@ -660,10 +703,9 @@ declare module 'interlinearizer' {
   }
 
   /**
-   * Analysis of one morpheme within a token's parse. Unlike `TokenAnalysis` and `SegmentAnalysis`,
-   * which reference their subject by id, `MorphemeAnalysis` owns the morpheme itself: `form` and
-   * `writingSystem` store the structural data directly, while the optional refs link it into the
-   * Lexicon extension for lexical resolution.
+   * Analysis of one morpheme within a token's parse. `MorphemeAnalysis` owns the morpheme itself:
+   * `form` and `writingSystem` store the structural data directly, while the optional refs link it
+   * into the Lexicon extension for lexical resolution.
    *
    * `form` is the morpheme's surface text as it appeared in this analysis context — which may
    * differ from the citation form on the referenced lexicon entry (e.g. under phonological
@@ -735,10 +777,17 @@ declare module 'interlinearizer' {
   // §4 PhraseAnalysis — multi-token gloss unit
   // ---------------------------------------------------------------------------
 
+  /** Links one `PhraseAnalysis` payload record to one or more token snapshots. */
+  export interface PhraseAnalysisLink extends AnalysisLink {
+    /** Ordered snapshots of tokens that compose this phrase. */
+    tokens: [TokenSnapshot, ...TokenSnapshot[]];
+  }
+
   /**
    * A multi-token unit glossed or analyzed as a single phrase.
    *
-   * `tokens` lists the tokens (in order) that belong to the phrase. The tokens may be:
+   * The linked `PhraseAnalysisLink.tokens` list holds the token snapshots (in order) that belong to
+   * the phrase. The tokens may be:
    *
    * - Adjacent within one segment ("en el" → "in the")
    * - Disjoint within one segment (French "ne … pas" → "not")
@@ -761,15 +810,12 @@ declare module 'interlinearizer' {
    * - LCM: LCM does not natively model multi-word phrases as first-class objects. Multi-word glosses,
    *   when present, must be synthesized as `PhraseAnalysis` records during import.
    * - Paratext: a `LexemeCluster` with `Type = Phrase` spans multiple words — each such cluster
-   *   becomes one `PhraseAnalysis` whose `tokenRefs` enumerate the covered tokens. `senseRef` is
-   *   the selected `LexemeData` reference for the phrase.
+   *   becomes one `PhraseAnalysis` whose linked `PhraseAnalysisLink.tokens` enumerate the covered
+   *   tokens. `senseRef` is the selected `LexemeData` reference for the phrase.
    * - BT Extension: not natively tracked. Must be synthesized during migration when adjacent tokens
    *   share the same gloss / sense.
    */
   export interface PhraseAnalysis extends Analysis {
-    /** Ordered snapshots of tokens that compose this phrase. */
-    tokens: [TokenSnapshot, ...TokenSnapshot[]];
-
     /**
      * Free-form gloss string keyed by BCP 47 analysis-language tag. Takes precedence over
      * `senseRef` when both are present.
@@ -913,8 +959,8 @@ declare module 'interlinearizer' {
    *
    * The token hierarchy (`Book` / `Segment` / `Token`) is **not** stored here — it is rebuilt from
    * Platform.Bible's USJ on each load. Only the analysis data and alignment links are persisted.
-   * Token-level drift is detected via `tokenSnapshot` on `TokenAnalysis` records and via
-   * `token.surfaceText` on `AlignmentEndpoint` records.
+   * Token-level drift is detected via `token.surfaceText` snapshots on `AlignmentEndpoint`
+   * records.
    *
    * Projects are stored via `papi.storage` (extension-host only) under two keys:
    *