diff --git a/CLAUDE.md b/CLAUDE.md index d56628cf..7dca3e2b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -175,9 +175,58 @@ and rolled-up key dates. It is **derived** from two append-only tables — `spac so replays are idempotent; an `as_of` guard protects filing-sourced scalar fields from out-of-order writes, and `spac_history` + `ChangeLog` version the row. -Today only the IPO half is populated (S-1/DRS → `registration`, priced 424B1/424B4 -→ `ipo`); de-SPAC events (8-K items, S-4/proxy, redemptions, PIPE, de-registration) -are defined-but-deferred slots. +The IPO half is populated from S-1/DRS (`registration`) and priced 424B1/424B4 +(`ipo`). De-SPAC **milestone dates** are populated deterministically from 8-K +item codes (known SPACs only — a `spac` row must already exist): item `1.01` → +`definitive_agreement`, `1.02` → `terminated`, `2.01` → `completed`, `5.07` → +`vote`. These group into `spac_deal` attempts via `deriveDeals` +(recomputed from the event stream on every write, so `deal_index` is stable +across replays) and roll up automatically. `target_name`, `pipe_amount`, and +redemption amounts stay null until the narrative/AI extractors (S-4 / DEFM14A / +425) land — 8-K item codes carry no names or amounts. Still deferred: name/SIC/ +ticker transitions and Form 25/15 de-registration. + +**Merger proxies** (`DEFM14A`/`PREM14A`, the `DEFM14C`/`PREM14C` consent statements, +and the `DEFR14A`/`PRER14A` revised proxies; extractor id `merger-proxy`) run +`processMergerProxy` (known SPACs only — a `spac` row must already exist): AI +extraction over the merger / business-combination / PIPE sections records a +per-accession `spac_merger_extraction` row (target name/CIK, PIPE amount, merger +consideration) and observes the target company (`relation: "merger-proxy:target"`, +`target_cik` resolved from the canonical company when it has one). `deriveDeals` +correlates each extraction onto the matching `spac_deal` by filing-date window — +*deriving* `target_name` / `target_cik` / `pipe_amount` (a later filing supersedes +an earlier one — definitive over preliminary, revised over definitive), which +retires the 8-K path's positional merge-preserve. Only the **definitive merger** +statements `DEFM14A` and `DEFM14C` emit the `proxy` event (→ `proxy_date` / +`status = proxy`): a consent deal (14C) has no `8-K 5.07` vote, so the definitive +14C is its only approval-stage signal. Preliminary (`PREM14A`/`PREM14C`) and revised +(`DEFR14A`/`PRER14A`) proxies are extraction-only. S-4 is deferred (newco-CIK linkage). Configure the +model via `SEC_MERGER_PROXY_MODEL` (default `claude-sonnet-4-6`) and an optional +confidence floor via `SEC_MERGER_PROXY_CONFIDENCE_FLOOR` (falls back to the shared +`SEC_S1_CONFIDENCE_FLOOR` when unset). + +```bash +sec fetch form DEFM14A # fetch + extract a merger proxy +sec extractor dead-letters merger-proxy # version-fixable extraction failures +sec extractor retry-dead-letters merger-proxy +``` + +**Redemption actuals** (extractor id `redemption`) are AI-extracted from a known +SPAC's post-vote 8-K narrative. When an 8-K carries item `5.07`, `2.01`, or `8.01` +for a known SPAC, ingestion escalates the fetch to the full submission `.txt` and +reads the primary document + `EX-99.x` exhibits; `processRedemption8K` records a +per-accession `spac_redemption_extraction` row, and `deriveDeals` correlates +`redemption_amount` / `redemption_shares` onto the matching `spac_deal`. The deal +column is the sole source `total_redemption_amount` sums, so redemptions are counted +once. Configure the model via `SEC_REDEMPTION_MODEL` (default `claude-sonnet-4-6`) +and an optional confidence floor via `SEC_REDEMPTION_CONFIDENCE_FLOOR` (falls back to +`SEC_S1_CONFIDENCE_FLOOR`). + +```bash +sec spac backfill-redemptions # sweep historical known-SPAC trigger 8-Ks +sec extractor dead-letters redemption # version-fixable extraction failures +sec extractor retry-dead-letters redemption +``` ```bash sec spac report [--format json] # consolidated report diff --git a/src/cli/groups/version.test.ts b/src/cli/groups/version.test.ts index b4d994f5..0641bff8 100644 --- a/src/cli/groups/version.test.ts +++ b/src/cli/groups/version.test.ts @@ -64,6 +64,8 @@ describe("sec version CLI", () => { "CFPORTAL", "D", "S-1", + "merger-proxy", + "redemption", ]); const extractorRows = parsed.filter( (r: { component_kind: string }) => r.component_kind === "extractor" diff --git a/src/commands/spac.ts b/src/commands/spac.ts index fb8532f7..b1fea2e3 100644 --- a/src/commands/spac.ts +++ b/src/commands/spac.ts @@ -6,9 +6,11 @@ import { Command } from "commander"; import { globalServiceRegistry } from "workglow"; +import { withCli } from "@workglow/cli"; import { SpacRepo } from "../storage/spac/SpacRepo"; import { SPAC_SPONSOR_LINK_REPOSITORY_TOKEN } from "../storage/canonical/SpacSponsorLinkSchema"; import { UNDERWRITER_LINK_REPOSITORY_TOKEN } from "../storage/canonical/UnderwriterLinkSchema"; +import { BackfillRedemptionsTask } from "../task/spac/BackfillRedemptionsTask"; export interface SpacReport { readonly cik: number; @@ -110,4 +112,15 @@ export function registerSpacCommands(program: Command): void { ); } }); + + spacCmd + .command("backfill-redemptions") + .description("Re-process known-SPAC trigger-item 8-Ks to extract realized redemptions") + .action(async () => { + const out = (await withCli(new BackfillRedemptionsTask()).run({})) as { + selected: number; + processed: number; + }; + console.log(`selected ${out.selected} filing(s); processed ${out.processed}`); + }); } diff --git a/src/config/DefaultDI.ts b/src/config/DefaultDI.ts index eb41e92e..6a2a6503 100644 --- a/src/config/DefaultDI.ts +++ b/src/config/DefaultDI.ts @@ -344,6 +344,16 @@ import { SpacHistoryPrimaryKeyNames, SpacHistorySchema, } from "../storage/spac/SpacHistorySchema"; +import { + SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN, + SpacMergerExtractionPrimaryKeyNames, + SpacMergerExtractionSchema, +} from "../storage/spac/SpacMergerExtractionSchema"; +import { + SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN, + SpacRedemptionExtractionPrimaryKeyNames, + SpacRedemptionExtractionSchema, +} from "../storage/spac/SpacRedemptionExtractionSchema"; import { createStorage } from "./createStorage"; export const DefaultDI = () => { @@ -665,6 +675,24 @@ export const DefaultDI = () => { SPAC_HISTORY_REPOSITORY_TOKEN, createStorage("spac_history", SpacHistorySchema, SpacHistoryPrimaryKeyNames, [["cik"]]) ); + globalServiceRegistry.registerInstance( + SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN, + createStorage( + "spac_merger_extraction", + SpacMergerExtractionSchema, + SpacMergerExtractionPrimaryKeyNames, + [["cik"]] + ) + ); + globalServiceRegistry.registerInstance( + SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN, + createStorage( + "spac_redemption_extraction", + SpacRedemptionExtractionSchema, + SpacRedemptionExtractionPrimaryKeyNames, + [["cik"]] + ) + ); // ----- Observation / Canonical / Resolver ----- globalServiceRegistry.registerInstance( diff --git a/src/config/TestingDI.ts b/src/config/TestingDI.ts index 8391d2c7..e246c4b1 100644 --- a/src/config/TestingDI.ts +++ b/src/config/TestingDI.ts @@ -258,6 +258,16 @@ import { SpacHistoryPrimaryKeyNames, SpacHistorySchema, } from "../storage/spac/SpacHistorySchema"; +import { + SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN, + SpacMergerExtractionPrimaryKeyNames, + SpacMergerExtractionSchema, +} from "../storage/spac/SpacMergerExtractionSchema"; +import { + SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN, + SpacRedemptionExtractionPrimaryKeyNames, + SpacRedemptionExtractionSchema, +} from "../storage/spac/SpacRedemptionExtractionSchema"; import { CANONICAL_COMPANY_REPOSITORY_TOKEN, CanonicalCompanyPrimaryKeyNames, @@ -493,6 +503,22 @@ export function resetDependencyInjectionsForTesting() { SPAC_HISTORY_REPOSITORY_TOKEN, new InMemoryTabularStorage(SpacHistorySchema, SpacHistoryPrimaryKeyNames, [["cik"]]) ); + globalServiceRegistry.registerInstance( + SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN, + new InMemoryTabularStorage( + SpacMergerExtractionSchema, + SpacMergerExtractionPrimaryKeyNames, + [["cik"]] + ) + ); + globalServiceRegistry.registerInstance( + SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN, + new InMemoryTabularStorage( + SpacRedemptionExtractionSchema, + SpacRedemptionExtractionPrimaryKeyNames, + [["cik"]] + ) + ); // Initialize Crowdfunding repositories globalServiceRegistry.registerInstance( diff --git a/src/config/setupAllDatabases.ts b/src/config/setupAllDatabases.ts index 48f1e415..64c6348b 100644 --- a/src/config/setupAllDatabases.ts +++ b/src/config/setupAllDatabases.ts @@ -54,6 +54,8 @@ import { SPAC_REPOSITORY_TOKEN } from "../storage/spac/SpacSchema"; import { SPAC_DEAL_REPOSITORY_TOKEN } from "../storage/spac/SpacDealSchema"; import { SPAC_EVENT_REPOSITORY_TOKEN } from "../storage/spac/SpacEventSchema"; import { SPAC_HISTORY_REPOSITORY_TOKEN } from "../storage/spac/SpacHistorySchema"; +import { SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN } from "../storage/spac/SpacMergerExtractionSchema"; +import { SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN } from "../storage/spac/SpacRedemptionExtractionSchema"; import { CANONICAL_COMPANY_ALIAS_REPOSITORY_TOKEN, CANONICAL_PERSON_ALIAS_REPOSITORY_TOKEN, @@ -145,6 +147,8 @@ export async function setupAllDatabases(): Promise { await globalServiceRegistry.get(SPAC_DEAL_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(SPAC_EVENT_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(SPAC_HISTORY_REPOSITORY_TOKEN).setupDatabase(); + await globalServiceRegistry.get(SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN).setupDatabase(); + await globalServiceRegistry.get(SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(CIK_LAST_UPDATE_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(PROCESSED_FACTS_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(PROCESSED_SUBMISSIONS_REPOSITORY_TOKEN).setupDatabase(); diff --git a/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts b/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts index f762aec9..d0f70352 100644 --- a/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts +++ b/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts @@ -4,10 +4,15 @@ * SPDX-License-Identifier: Apache-2.0 */ +import type { ModelConfig } from "workglow"; import { Form8KEventRepo } from "../../../storage/form-8k-event/Form8KEventRepo"; import type { Form8KEvent } from "../../../storage/form-8k-event/Form8KEventSchema"; import type { Form8K } from "./Form_8_K.schema"; import { Form_8_K_ITEMS } from "./Form_8_K"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { mapItemCodesToSpacEvents } from "./spac8kMilestones"; +import { processRedemption8K } from "./redemption8k"; /** * Extracts item codes from the filing metadata `items` field. @@ -48,6 +53,8 @@ export async function processForm8K({ items, report_date, form8K, + fullSubmissionText, + model, }: { readonly cik: number; readonly accession_number: string; @@ -56,6 +63,8 @@ export async function processForm8K({ readonly items: string | undefined | null; readonly report_date: string | undefined | null; readonly form8K: Form8K; + readonly fullSubmissionText?: string; + readonly model?: ModelConfig; }): Promise { const eventRepo = new Form8KEventRepo(); const isAmendment = form === "8-K/A"; @@ -76,4 +85,37 @@ export async function processForm8K({ }; await eventRepo.saveEvent(event); } + + // --- Consolidated SPAC report: map de-SPAC milestone items (known SPACs only) --- + const spacRow = await new SpacRepo().getSpac(cik); + if (spacRow) { + // Skip when no usable date is available: an undated milestone (empty + // event_date) would write junk announced_date/definitive_agreement_date + // onto the deal/row. Reachable only on the best-effort path where the + // filing-metadata row is absent (report_date null, filing_date ""). + const eventDate = effectiveReportDate || filing_date; + const spacEvents = eventDate ? mapItemCodesToSpacEvents(itemCodes, eventDate) : []; + if (spacEvents.length > 0) { + await new SpacReportWriter().recordDealMilestones({ + cik, + accession_number, + filing_date, + form, + primary_document: null, + events: spacEvents, + }); + } + } + + if (spacRow && fullSubmissionText) { + await processRedemption8K({ + cik, + accession_number, + filing_date, + form, + itemCodes, + fullSubmissionText, + model, + }); + } } diff --git a/src/sec/forms/miscellaneous-filings/redemption8k.e2e.test.ts b/src/sec/forms/miscellaneous-filings/redemption8k.e2e.test.ts new file mode 100644 index 00000000..c23d73c4 --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/redemption8k.e2e.test.ts @@ -0,0 +1,179 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../../config/TestingDI"; +import { setupAllDatabases } from "../../../config/setupAllDatabases"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { + fakeS1Model, + registerFakeStructuredProvider, +} from "../registration-statements/s1/testing/fakeStructuredProvider"; +import { processForm8K } from "./Form_8_K.storage"; + +const FULL_TXT = + "\nACCESSION NUMBER: 0000000000-26-000020\n\n" + + "\n8-K\n1\n\n

Vote results.

\n
\n
\n" + + "\nEX-99.1\n2\n\n" + + "

Holders of 800,000 shares elected to redeem for $8,200,000.

\n" + + "
\n
\n"; + +describe("processForm8K — redemption e2e", () => { + let cleanup: (() => void) | undefined; + + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + }); + afterEach(() => { + cleanup?.(); + cleanup = undefined; + }); + + async function seedSpacWithDeal(cik: number): Promise { + const writer = new SpacReportWriter(); + await writer.recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "E2E SPAC Inc.", + spac_sic: 6770, + }); + await writer.recordDealMilestones({ + cik, + accession_number: `${cik}-da`, + filing_date: "2026-01-10", + form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2026-01-10" }], + }); + } + + it("derives redemption onto deal and rolls up into spac report exactly once", async () => { + await seedSpacWithDeal(20); + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 800000, + redemption_amount: 8200000, + price_per_share: 10.25, + confidence: 0.95, + source_span: "800,000 shares elected to redeem for $8,200,000", + }, + ]); + cleanup = registration.unregister; + + await processForm8K({ + cik: 20, + accession_number: "0000000000-26-000020", + filing_date: "2026-03-20", + form: "8-K", + items: "5.07", + report_date: "2026-03-19", + form8K: {}, + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + const deals = await new SpacRepo().getDeals(20); + expect(deals[0].redemption_amount).toBe(8200000); + + const spacRow = await new SpacRepo().getSpac(20); + expect(spacRow?.total_redemption_amount).toBe(8200000); + }); + + it("is idempotent — reprocessing the same 8-K does not double the redemption amount", async () => { + await seedSpacWithDeal(20); + + const args = { + cik: 20, + accession_number: "0000000000-26-000020", + filing_date: "2026-03-20", + form: "8-K", + items: "5.07", + report_date: "2026-03-19", + form8K: {}, + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + } as const; + + // First call + const reg1 = registerFakeStructuredProvider([ + { + redemption_shares: 800000, + redemption_amount: 8200000, + price_per_share: 10.25, + confidence: 0.95, + source_span: "800,000 shares elected to redeem for $8,200,000", + }, + ]); + await processForm8K(args); + reg1.unregister(); + + // Second call — same accession, same payload + const reg2 = registerFakeStructuredProvider([ + { + redemption_shares: 800000, + redemption_amount: 8200000, + price_per_share: 10.25, + confidence: 0.95, + source_span: "800,000 shares elected to redeem for $8,200,000", + }, + ]); + cleanup = reg2.unregister; + await processForm8K(args); + + const deals = await new SpacRepo().getDeals(20); + expect(deals[0].redemption_amount).toBe(8200000); + + const spacRow = await new SpacRepo().getSpac(20); + expect(spacRow?.total_redemption_amount).toBe(8200000); + }); + + it("known SPAC with no deal yields no redemption rollup", async () => { + // Seed SPAC row but no deal milestone + await new SpacReportWriter().recordRegistration({ + cik: 21, + accession_number: "21-reg", + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "No Deal SPAC Inc.", + spac_sic: 6770, + }); + + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 800000, + redemption_amount: 8200000, + price_per_share: 10.25, + confidence: 0.95, + source_span: "800,000 shares elected to redeem for $8,200,000", + }, + ]); + cleanup = registration.unregister; + + await processForm8K({ + cik: 21, + accession_number: "0000000000-26-000021", + filing_date: "2026-03-20", + form: "8-K", + items: "5.07", + report_date: "2026-03-19", + form8K: {}, + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + const spacRow = await new SpacRepo().getSpac(21); + expect(spacRow).toBeDefined(); + expect(spacRow?.total_redemption_amount ?? null).toBeNull(); + + const deals = await new SpacRepo().getDeals(21); + expect(deals).toHaveLength(0); + }); +}); diff --git a/src/sec/forms/miscellaneous-filings/redemption8k.test.ts b/src/sec/forms/miscellaneous-filings/redemption8k.test.ts new file mode 100644 index 00000000..e6f28a46 --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/redemption8k.test.ts @@ -0,0 +1,187 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../../config/TestingDI"; +import { setupAllDatabases } from "../../../config/setupAllDatabases"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { SpacRedemptionExtractionRepo } from "../../../storage/spac/SpacRedemptionExtractionRepo"; +import { + fakeS1Model, + registerFakeStructuredProvider, +} from "../registration-statements/s1/testing/fakeStructuredProvider"; +import { hasRedemptionTriggerItem } from "./spac8kRedemptionTriggers"; +import { processRedemption8K } from "./redemption8k"; + +const FULL_TXT = + "\nACCESSION NUMBER: 0000000000-26-000009\n\n" + + "\n8-K\n1\n\n

Vote results.

\n
\n
\n" + + "\nEX-99.1\n2\n\n" + + "

Holders of 1,234,567 shares elected to redeem for $12,400,000.

\n" + + "
\n
\n"; + +describe("processRedemption8K", () => { + let cleanup: (() => void) | undefined; + + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + }); + afterEach(() => { + cleanup?.(); + cleanup = undefined; + }); + + it("hasRedemptionTriggerItem matches trigger codes only", () => { + expect(hasRedemptionTriggerItem("5.07,9.01")).toBe(true); + expect(hasRedemptionTriggerItem("2.02")).toBe(false); + expect(hasRedemptionTriggerItem(null)).toBe(false); + }); + + async function seedSpacWithOpenDeal(cik: number): Promise { + const writer = new SpacReportWriter(); + await writer.recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Redeem SPAC Inc.", + spac_sic: 6770, + }); + await writer.recordDealMilestones({ + cik, + accession_number: `${cik}-da`, + filing_date: "2026-01-10", + form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2026-01-10" }], + }); + } + + it("extracts a redemption and derives it onto the open deal", async () => { + await seedSpacWithOpenDeal(42); + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 1234567, + redemption_amount: 12400000, + price_per_share: 10.05, + confidence: 0.95, + source_span: "1,234,567 shares elected to redeem for $12,400,000", + }, + ]); + cleanup = registration.unregister; + + await processRedemption8K({ + cik: 42, + accession_number: "0000000000-26-000009", + filing_date: "2026-03-20", + form: "8-K", + itemCodes: ["5.07"], + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + const ext = await new SpacRedemptionExtractionRepo().getByAccession("0000000000-26-000009"); + expect(ext?.redemption_amount).toBe(12400000); + expect(ext?.redemption_shares).toBe(1234567); + + const deals = await new SpacRepo().getDeals(42); + expect(deals[0].redemption_amount).toBe(12400000); + expect(deals[0].redemption_shares).toBe(1234567); + }); + + it("writes nothing without a trigger item", async () => { + await seedSpacWithOpenDeal(43); + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 1, + redemption_amount: 1, + price_per_share: 10, + confidence: 0.95, + source_span: "elected to redeem", + }, + ]); + cleanup = registration.unregister; + + await processRedemption8K({ + cik: 43, + accession_number: "0000000000-26-000010", + filing_date: "2026-03-20", + form: "8-K", + itemCodes: ["9.01"], + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + expect( + await new SpacRedemptionExtractionRepo().getByAccession("0000000000-26-000010") + ).toBeUndefined(); + }); + + it("writes nothing for a CIK with no spac row (gate)", async () => { + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 1, + redemption_amount: 1, + price_per_share: 10, + confidence: 0.95, + source_span: "elected to redeem", + }, + ]); + cleanup = registration.unregister; + + await processRedemption8K({ + cik: 99, + accession_number: "0000000000-26-000011", + filing_date: "2026-03-20", + form: "8-K", + itemCodes: ["5.07"], + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + expect( + await new SpacRedemptionExtractionRepo().getByAccession("0000000000-26-000011") + ).toBeUndefined(); + }); + + it("does not extract when the SPAC has no deals", async () => { + await new SpacReportWriter().recordRegistration({ + cik: 44, + accession_number: "44-reg", + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Dealless SPAC Inc.", + spac_sic: 6770, + }); + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 1, + redemption_amount: 1, + price_per_share: 10, + confidence: 0.95, + source_span: "elected to redeem", + }, + ]); + cleanup = registration.unregister; + + await processRedemption8K({ + cik: 44, + accession_number: "0000000000-26-000012", + filing_date: "2026-03-20", + form: "8-K", + itemCodes: ["5.07"], + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + expect( + await new SpacRedemptionExtractionRepo().getByAccession("0000000000-26-000012") + ).toBeUndefined(); + }); +}); diff --git a/src/sec/forms/miscellaneous-filings/redemption8k.ts b/src/sec/forms/miscellaneous-filings/redemption8k.ts new file mode 100644 index 00000000..701b14d2 --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/redemption8k.ts @@ -0,0 +1,147 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import type { ModelConfig } from "workglow"; +import { globalServiceRegistry, renderMarkdown } from "workglow"; +import { parseEdgarHtml } from "../../html/parseEdgarHtml"; +import { parseEightKSubmission } from "../registration-statements/s1/parseSubmission"; +import { makeRunSection } from "../registration-statements/s1/sectionRunner"; +import { spanAppearsIn } from "../registration-statements/s1/verifySourceSpan"; +import { extractRedemption } from "../registration-statements/s1/sectionExtractors"; +import type { RedemptionRow } from "../registration-statements/s1/redemptionSchema"; +import { + getRedemptionModel, + getRedemptionConfidenceFloor, + resolveModelId, +} from "../registration-statements/s1/redemptionModel"; +import { VersionRegistry } from "../../../storage/versioning/VersionRegistry"; +import { getActiveSlot } from "../../../storage/versioning/getActiveSlot"; +import { COMPONENT_VERSION_REPOSITORY_TOKEN } from "../../../storage/versioning/ComponentVersionSchema"; +import { ExtractionDeadLetterRepo } from "../../../storage/dead-letter/ExtractionDeadLetterRepo"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { SpacRedemptionExtractionRepo } from "../../../storage/spac/SpacRedemptionExtractionRepo"; +import { REDEMPTION_TRIGGER_ITEMS } from "./spac8kRedemptionTriggers"; + +const EXTRACTOR_ID = "redemption"; +const DEFAULT_EXTRACTOR_VERSION = "1.0.0"; +const REDEMPTION_SECTION = "redemption"; + +export interface ProcessRedemption8KArgs { + readonly cik: number; + readonly accession_number: string; + readonly filing_date: string; + readonly form: string; + readonly itemCodes: readonly string[]; + readonly fullSubmissionText: string; + readonly model?: ModelConfig; +} + +/** Renders an EDGAR HTML body to plain markdown text (source-span verifiable). */ +function renderBody(html: string, title: string): string { + const doc = parseEdgarHtml(html, title); + return doc.children + .map((n) => renderMarkdown(n)) + .filter((s) => s.length > 0) + .join("\n\n") + .trim(); +} + +/** + * AI-extract realized redemptions from a known SPAC's vote-results / closing + * 8-K (primary document + EX-99.x exhibits). Gated on a trigger item and an + * existing deal to attach to. Persists a redemption-extraction row and + * recomputes deals so the redemption is correlated onto the matching deal. + */ +export async function processRedemption8K(args: ProcessRedemption8KArgs): Promise { + const { cik, accession_number, filing_date, form, itemCodes, fullSubmissionText } = args; + + if (!itemCodes.some((c) => REDEMPTION_TRIGGER_ITEMS.includes(c))) return; + + const spacRepo = new SpacRepo(); + const spac = await spacRepo.getSpac(cik); + if (!spac) return; + const deals = await spacRepo.getDeals(cik); + if (deals.length === 0) return; + + const versionRegistry = new VersionRegistry( + globalServiceRegistry.get(COMPONENT_VERSION_REPOSITORY_TOKEN) + ); + const extractorSlot = await getActiveSlot(versionRegistry, "extractor", EXTRACTOR_ID); + const extractor_version = extractorSlot?.semver ?? DEFAULT_EXTRACTOR_VERSION; + const deadLetters = new ExtractionDeadLetterRepo(); + const model = args.model ?? (await getRedemptionModel()); + const model_id = resolveModelId(model); + + // Parsing/rendering filer-supplied HTML must not abort the filing (its 8-K + // events and milestone deals already wrote); a malformed body dead-letters the + // section so a version bump can retry it, mirroring the merger-proxy path. + let text: string; + try { + const { primaryHtml, exhibitsHtml } = parseEightKSubmission(form, fullSubmissionText); + text = [primaryHtml, ...exhibitsHtml] + .map((h, i) => renderBody(h, `${form} ${accession_number} #${i}`)) + .filter((t) => t.length > 0) + .join("\n\n"); + } catch (err) { + await deadLetters.record({ + extractor_id: EXTRACTOR_ID, + accession_number, + section_name: REDEMPTION_SECTION, + reason_code: "PARSE_ERROR", + detail: err instanceof Error ? err.message : String(err), + failed_extractor_version: extractor_version, + source_run_id: null, + }); + return; + } + + const runSection = makeRunSection({ + deadLetters, + extractor_id: EXTRACTOR_ID, + extractor_version, + accession_number, + confidenceFloor: getRedemptionConfidenceFloor(), + }); + + let persisted = 0; + await runSection({ + sectionName: REDEMPTION_SECTION, + text: text === "" ? undefined : text, + notFoundDetail: "no primary/EX-99 narrative text", + emptyDetail: "no redemption returned", + lowConfidenceDetail: "below confidence floor", + verifyRow: (t, r) => spanAppearsIn(t, r.source_span), + unverifiedAllDetail: "redemption source_span not present in narrative text", + extract: async (t) => { + const row = await extractRedemption(t, model); + return row === null ? [] : [row]; + }, + persist: async (rows) => { + const row = rows[0]; + await new SpacRedemptionExtractionRepo().save({ + accession_number, + cik, + form, + filing_date, + extractor_id: EXTRACTOR_ID, + extractor_version, + redemption_shares: row.redemption_shares, + redemption_amount: row.redemption_amount, + price_per_share: row.price_per_share, + confidence: row.confidence, + source_span: row.source_span, + model_id, + created_at: new Date().toISOString(), + }); + persisted = 1; + return 1; + }, + }); + + if (persisted > 0) { + await new SpacReportWriter().recordRedemption({ cik, accession_number, filing_date, form }); + } +} diff --git a/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts b/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts new file mode 100644 index 00000000..1958ec82 --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts @@ -0,0 +1,159 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { beforeEach, describe, expect, it } from "bun:test"; +import { mapItemCodesToSpacEvents } from "./spac8kMilestones"; +import { resetDependencyInjectionsForTesting } from "../../../config/TestingDI"; +import { setupAllDatabases } from "../../../config/setupAllDatabases"; +import { Form_8_K } from "./Form_8_K"; +import { processForm8K } from "./Form_8_K.storage"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; + +describe("mapItemCodesToSpacEvents", () => { + it("maps the four milestone item codes to lifecycle events", () => { + expect(mapItemCodesToSpacEvents(["1.01"], "2021-03-01")).toEqual([ + { event_type: "definitive_agreement", event_date: "2021-03-01" }, + ]); + expect(mapItemCodesToSpacEvents(["1.02"], "2021-03-01")).toEqual([ + { event_type: "terminated", event_date: "2021-03-01" }, + ]); + expect(mapItemCodesToSpacEvents(["2.01"], "2021-03-01")).toEqual([ + { event_type: "completed", event_date: "2021-03-01" }, + ]); + expect(mapItemCodesToSpacEvents(["5.07"], "2021-03-01")).toEqual([ + { event_type: "vote", event_date: "2021-03-01" }, + ]); + }); + + it("ignores non-milestone item codes", () => { + expect(mapItemCodesToSpacEvents(["2.02", "9.01", "7.01"], "2021-03-01")).toEqual([]); + }); + + it("maps only the milestone items from a mixed filing", () => { + const events = mapItemCodesToSpacEvents(["1.01", "7.01", "8.01", "9.01"], "2021-03-01"); + expect(events).toEqual([ + { event_type: "definitive_agreement", event_date: "2021-03-01" }, + ]); + }); +}); + +describe("processForm8K SPAC milestone wiring", () => { + let repo: SpacRepo; + + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + repo = new SpacRepo(); + }); + + async function seedSpac(cik: number): Promise { + await new SpacReportWriter().recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2020-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Test SPAC", + spac_sic: 6770, + }); + } + + async function run8K( + cik: number, + accession_number: string, + items: string, + report_date: string + ): Promise { + const form8K = await Form_8_K.parse("8-K", ""); + await processForm8K({ + cik, + accession_number, + filing_date: report_date, + form: "8-K", + items, + report_date, + form8K, + }); + } + + it("advances a known SPAC through DA -> vote -> completion", async () => { + await seedSpac(100); + await run8K(100, "100-da", "1.01,9.01", "2021-03-01"); + await run8K(100, "100-vote", "5.07", "2021-06-01"); + await run8K(100, "100-close", "2.01,5.01", "2021-06-15"); + + const row = await repo.getSpac(100); + expect(row?.status).toBe("completed"); + expect(row?.definitive_agreement_date).toBe("2021-03-01"); + expect(row?.vote_date).toBe("2021-06-01"); + expect(row?.completed_date).toBe("2021-06-15"); + + const deals = await repo.getDeals(100); + expect(deals.length).toBe(1); + expect(deals[0].outcome).toBe("completed"); + }); + + it("writes no SPAC events for a CIK with no spac row", async () => { + await run8K(200, "200-da", "1.01,9.01", "2021-03-01"); + expect(await repo.getSpac(200)).toBeUndefined(); + expect(await repo.getEvents(200)).toEqual([]); + expect(await repo.getDeals(200)).toEqual([]); + }); + + it("uses report_date as the event date and is idempotent on reprocess", async () => { + await seedSpac(300); + await run8K(300, "300-da", "1.01", "2021-03-01"); + await run8K(300, "300-da", "1.01", "2021-03-01"); // reprocess + + const events = await repo.getEvents(300); + expect(events.filter((e) => e.event_type === "definitive_agreement").length).toBe(1); + expect( + events.find((e) => e.event_type === "definitive_agreement")?.event_date + ).toBe("2021-03-01"); + const deals = await repo.getDeals(300); + expect(deals.length).toBe(1); + }); + + it("prefers report_date over filing_date for the event date", async () => { + await seedSpac(400); + const form8K = await Form_8_K.parse("8-K", ""); + await processForm8K({ + cik: 400, + accession_number: "400-da", + filing_date: "2021-03-10", // later than the report/triggering date + form: "8-K", + items: "1.01", + report_date: "2021-03-01", // the actual triggering-event date + form8K, + }); + + const events = await repo.getEvents(400); + const da = events.find((e) => e.event_type === "definitive_agreement"); + expect(da?.event_date).toBe("2021-03-01"); + const deals = await repo.getDeals(400); + expect(deals[0].definitive_agreement_date).toBe("2021-03-01"); + }); + + it("records no milestone when neither report_date nor filing_date is available", async () => { + await seedSpac(500); + const form8K = await Form_8_K.parse("8-K", ""); + await processForm8K({ + cik: 500, + accession_number: "500-da", + filing_date: "", // best-effort path: filing-metadata row absent + form: "8-K", + items: "1.01", + report_date: null, + form8K, + }); + + // An undated 8-K must not write a milestone (empty event_date would be junk). + const events = await repo.getEvents(500); + expect(events.some((e) => e.event_type === "definitive_agreement")).toBe(false); + expect(await repo.getDeals(500)).toEqual([]); + }); +}); diff --git a/src/sec/forms/miscellaneous-filings/spac8kMilestones.ts b/src/sec/forms/miscellaneous-filings/spac8kMilestones.ts new file mode 100644 index 00000000..2cf4bb2f --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/spac8kMilestones.ts @@ -0,0 +1,37 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { SpacEventType } from "../../../storage/spac/SpacEventSchema"; + +/** 8-K item code -> SPAC lifecycle event. Only these four items participate. */ +const ITEM_TO_SPAC_EVENT: Record = { + "1.01": "definitive_agreement", + "1.02": "terminated", + "2.01": "completed", + "5.07": "vote", +}; + +export interface SpacMilestoneEvent { + readonly event_type: SpacEventType; + readonly event_date: string; +} + +/** + * Map a filing's 8-K item codes to SPAC lifecycle events. `eventDate` is the + * caller's resolved triggering-event date (the 8-K period-of-report, falling + * back to the filing date). Non-milestone items are dropped. + */ +export function mapItemCodesToSpacEvents( + itemCodes: readonly string[], + eventDate: string +): SpacMilestoneEvent[] { + const events: SpacMilestoneEvent[] = []; + for (const code of itemCodes) { + const event_type = ITEM_TO_SPAC_EVENT[code]; + if (event_type) events.push({ event_type, event_date: eventDate }); + } + return events; +} diff --git a/src/sec/forms/miscellaneous-filings/spac8kRedemptionTriggers.ts b/src/sec/forms/miscellaneous-filings/spac8kRedemptionTriggers.ts new file mode 100644 index 00000000..0021a5eb --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/spac8kRedemptionTriggers.ts @@ -0,0 +1,15 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +/** 8-K item codes that can carry realized SPAC redemptions. */ +export const REDEMPTION_TRIGGER_ITEMS: readonly string[] = ["5.07", "2.01", "8.01"]; + +/** True when a comma/semicolon-separated items string contains a trigger code. */ +export function hasRedemptionTriggerItem(items: string | null | undefined): boolean { + if (!items) return false; + const set = new Set(items.split(/[,;]/).map((s) => s.trim())); + return REDEMPTION_TRIGGER_ITEMS.some((code) => set.has(code)); +} diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts new file mode 100644 index 00000000..307a42c3 --- /dev/null +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts @@ -0,0 +1,201 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../../config/TestingDI"; +import { setupAllDatabases } from "../../../config/setupAllDatabases"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { SpacMergerExtractionRepo } from "../../../storage/spac/SpacMergerExtractionRepo"; +import { + fakeS1Model, + registerFakeStructuredProvider, +} from "../registration-statements/s1/testing/fakeStructuredProvider"; +import { Form_DEFM14A } from "./Form_DEFM14A"; +import { processMergerProxy } from "./Form_DEFM14A.storage"; + +const FIXTURE = `${import.meta.dir}/mock_data/merger-proxy/defm14a_sample.txt`; + +// The stub model returns a fixed merger deal; source_span must appear verbatim +// in the fixture's "The Business Combination" section text (verifyRow gate). +function scriptMergerDeal(): () => void { + const { unregister } = registerFakeStructuredProvider([ + { + target_name: "Acme Target Inc.", + pipe_amount: 150000000, + merger_consideration: "$10.00 per share in stock", + confidence: 0.95, + source_span: "business combination with Acme Target Inc.", + }, + ]); + return unregister; +} + +describe("processMergerProxy (e2e)", () => { + let repo: SpacRepo; + let cleanup: (() => void) | undefined; + + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + repo = new SpacRepo(); + }); + afterEach(() => { + cleanup?.(); + cleanup = undefined; + }); + + async function seedSpacWithOpenDeal(cik: number): Promise { + const writer = new SpacReportWriter(); + await writer.recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2020-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Merge SPAC Inc.", + spac_sic: 6770, + }); + await writer.recordDealMilestones({ + cik, + accession_number: `${cik}-da`, + filing_date: "2021-03-05", + form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2021-03-01" }], + }); + } + + async function runProxy( + cik: number, + accession_number: string, + form = "DEFM14A", + filing_date = "2021-05-01" + ): Promise { + const txt = await Bun.file(FIXTURE).text(); + const parsed = await Form_DEFM14A.parse(form, txt); + await processMergerProxy({ + cik, + file_number: "", + accession_number, + filing_date, + primary_doc: "proxy.htm", + form, + formMergerProxy: parsed, + model: fakeS1Model(), + }); + } + + it("extracts the target/pipe, correlates onto the deal, and rolls up to proxy", async () => { + await seedSpacWithOpenDeal(100); + cleanup = scriptMergerDeal(); + await runProxy(100, "100-defm"); + + const extraction = await new SpacMergerExtractionRepo().getByAccession("100-defm"); + expect(extraction?.target_name).toBe("Acme Target Inc."); + expect(extraction?.pipe_amount).toBe(150000000); + expect(extraction?.merger_consideration).toBe("$10.00 per share in stock"); + + const deals = await repo.getDeals(100); + expect(deals).toHaveLength(1); + expect(deals[0].target_name).toBe("Acme Target Inc."); + expect(deals[0].pipe_amount).toBe(150000000); + + const events = await repo.getEvents(100); + expect(events.filter((e) => e.event_type === "proxy")).toHaveLength(1); + + const row = await repo.getSpac(100); + expect(row?.status).toBe("proxy"); + expect(row?.target_name).toBe("Acme Target Inc."); + expect(row?.pipe_amount).toBe(150000000); + expect(row?.proxy_date).toBe("2021-05-01"); + }); + + it("writes nothing for a CIK with no spac row (gate)", async () => { + cleanup = scriptMergerDeal(); + await runProxy(200, "200-defm"); + expect(await repo.getSpac(200)).toBeUndefined(); + expect(await new SpacMergerExtractionRepo().getByAccession("200-defm")).toBeUndefined(); + expect(await repo.getEvents(200)).toEqual([]); + }); + + it("is idempotent when the same proxy is reprocessed", async () => { + await seedSpacWithOpenDeal(300); + cleanup = scriptMergerDeal(); + await runProxy(300, "300-defm"); + await runProxy(300, "300-defm"); + + const events = await repo.getEvents(300); + expect(events.filter((e) => e.event_type === "proxy")).toHaveLength(1); + expect(await repo.getDeals(300)).toHaveLength(1); + }); + + it("emits a proxy event for a definitive consent statement (DEFM14C)", async () => { + await seedSpacWithOpenDeal(110); + cleanup = scriptMergerDeal(); + await runProxy(110, "110-defm14c", "DEFM14C"); + + const events = await repo.getEvents(110); + expect(events.filter((e) => e.event_type === "proxy")).toHaveLength(1); + const row = await repo.getSpac(110); + expect(row?.status).toBe("proxy"); + expect(row?.target_name).toBe("Acme Target Inc."); + }); + + it("does not emit a proxy event for a preliminary consent statement (PREM14C)", async () => { + await seedSpacWithOpenDeal(111); + cleanup = scriptMergerDeal(); + await runProxy(111, "111-prem14c", "PREM14C"); + + const events = await repo.getEvents(111); + expect(events.some((e) => e.event_type === "proxy")).toBe(false); + const row = await repo.getSpac(111); + expect(row?.status).toBe("deal_announced"); + expect(row?.target_name).toBe("Acme Target Inc."); // still correlated + }); + + it("does not emit a proxy event for a preliminary revised proxy (PRER14A)", async () => { + await seedSpacWithOpenDeal(113); + cleanup = scriptMergerDeal(); + await runProxy(113, "113-prer14a", "PRER14A"); + + const events = await repo.getEvents(113); + expect(events.some((e) => e.event_type === "proxy")).toBe(false); + const row = await repo.getSpac(113); + expect(row?.status).toBe("deal_announced"); + expect(row?.target_name).toBe("Acme Target Inc."); // extraction-only, still correlated + }); + + it("a revised proxy (DEFR14A) supersedes target/pipe without a second proxy event", async () => { + await seedSpacWithOpenDeal(112); + const dealWithPipe = (pipe_amount: number) => [ + { + target_name: "Acme Target Inc.", + pipe_amount, + merger_consideration: "$10.00 per share in stock", + confidence: 0.95, + source_span: "business combination with Acme Target Inc.", + }, + ]; + + // Definitive proxy first: emits the proxy event + initial PIPE. + let registration = registerFakeStructuredProvider(dealWithPipe(150000000)); + cleanup = registration.unregister; // guard against a throw inside runProxy + await runProxy(112, "112-defm", "DEFM14A", "2021-05-01"); + registration.unregister(); + cleanup = undefined; + + // Revised definitive proxy, filed later -> its extraction wins correlation. + registration = registerFakeStructuredProvider(dealWithPipe(225000000)); + cleanup = registration.unregister; + await runProxy(112, "112-defr", "DEFR14A", "2021-05-10"); + + const events = await repo.getEvents(112); + expect(events.filter((e) => e.event_type === "proxy")).toHaveLength(1); // only DEFM14A + const deals = await repo.getDeals(112); + expect(deals[0].pipe_amount).toBe(225000000); // revised value wins (later filing_date) + }); +}); diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts new file mode 100644 index 00000000..58e36fd9 --- /dev/null +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts @@ -0,0 +1,195 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { globalServiceRegistry, type ModelConfig } from "workglow"; +import { buildEntityObserver } from "../../../resolver/buildEntityObserver"; +import { CanonicalCompanyRepo } from "../../../storage/canonical/CanonicalCompanyRepo"; +import { COMPONENT_VERSION_REPOSITORY_TOKEN } from "../../../storage/versioning/ComponentVersionSchema"; +import { VersionRegistry } from "../../../storage/versioning/VersionRegistry"; +import { getActiveSlot } from "../../../storage/versioning/getActiveSlot"; +import { ObservationProvenanceRepo } from "../../../storage/provenance/ObservationProvenanceRepo"; +import { ExtractionDeadLetterRepo } from "../../../storage/dead-letter/ExtractionDeadLetterRepo"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { SpacMergerExtractionRepo } from "../../../storage/spac/SpacMergerExtractionRepo"; +import { parseEdgarHtml } from "../../html/parseEdgarHtml"; +import { DocumentTreeSegmenter } from "../registration-statements/s1/DocumentTreeSegmenter"; +import { S1_SECTIONS, type S1SectionName } from "../registration-statements/s1/DocumentSegmenter"; +import { makeRunSection } from "../registration-statements/s1/sectionRunner"; +import { spanAppearsIn } from "../registration-statements/s1/verifySourceSpan"; +import { extractMergerDeal } from "../registration-statements/s1/sectionExtractors"; +import type { MergerDealRow } from "../registration-statements/s1/mergerDealSchema"; +import { + getMergerProxyModel, + getMergerProxyConfidenceFloor, + resolveModelId, +} from "../registration-statements/s1/mergerModel"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; + +const EXTRACTOR_ID = "merger-proxy"; +const DEFAULT_EXTRACTOR_VERSION = "1.0.0"; +const MERGER_SECTION = "merger"; +/** Definitive merger statements emit a `proxy` lifecycle event; others do not. */ +const DEFINITIVE_PROXY_FORMS = new Set(["DEFM14A", "DEFM14C"]); + +export interface ProcessMergerProxyArgs { + readonly cik: number; + readonly file_number: string; + readonly accession_number: string; + readonly filing_date: string; + readonly primary_doc: string; + readonly form: string; + readonly formMergerProxy: FormS1Parsed; + readonly model?: ModelConfig; +} + +/** + * Extract the deal identity + PIPE from a SPAC merger proxy — the 14A/14C merger + * and revised-proxy family (`DEFM14A`/`PREM14A`, `DEFM14C`/`PREM14C`, + * `DEFR14A`/`PRER14A`); see {@link DEFINITIVE_PROXY_FORMS} for which emit the + * proxy event. Gated on a known SPAC. Persists a `spac_merger_extraction` row, + * observes the target company, then records the proxy event and recomputes deals + * (correlation derives target/pipe onto the matching `spac_deal`). Degrades + * gracefully: when the merger section is absent or low-confidence, it dead-letters + * and still emits the proxy event (for definitive merger statements) so + * `proxy_date` advances. + */ +export async function processMergerProxy(args: ProcessMergerProxyArgs): Promise { + const { cik, accession_number, form, filing_date, formMergerProxy } = args; + + // Gate: known SPACs only (the proxy filer is always the SPAC). + const spacRow = await new SpacRepo().getSpac(cik); + if (!spacRow) return; + + const versionRegistry = new VersionRegistry( + globalServiceRegistry.get(COMPONENT_VERSION_REPOSITORY_TOKEN) + ); + const [extractorSlot, personSlot, companySlot] = await Promise.all([ + getActiveSlot(versionRegistry, "extractor", EXTRACTOR_ID), + getActiveSlot(versionRegistry, "resolver", "person"), + getActiveSlot(versionRegistry, "resolver", "company"), + ]); + const extractor_version = extractorSlot?.semver ?? DEFAULT_EXTRACTOR_VERSION; + const observer = buildEntityObserver({ + activeResolverPersonVersion: personSlot?.semver ?? "1.0.0", + activeResolverCompanyVersion: companySlot?.semver ?? "1.0.0", + }); + const provenance = new ObservationProvenanceRepo(); + const deadLetters = new ExtractionDeadLetterRepo(); + const model = args.model ?? (await getMergerProxyModel()); + const model_id = resolveModelId(model); + + // Segment; PARSE_ERROR dead-letters the merger section so a retry can resolve it. + let byName: Map; + try { + const doc = parseEdgarHtml(formMergerProxy.html, `${form} ${accession_number}`); + const sections = new DocumentTreeSegmenter().segment(doc); + byName = new Map(sections.map((s) => [s.name, s.text])); + } catch (err) { + await deadLetters.record({ + extractor_id: EXTRACTOR_ID, + accession_number, + section_name: MERGER_SECTION, + reason_code: "PARSE_ERROR", + detail: err instanceof Error ? err.message : String(err), + failed_extractor_version: extractor_version, + source_run_id: null, + }); + return; + } + + // Prefer the merger / business-combination / PIPE sections; concatenate when + // multiple are present. (No whole-document fallback: proxies are huge.) + const mergerText = [ + byName.get(S1_SECTIONS.THE_MERGER), + byName.get(S1_SECTIONS.BUSINESS_COMBINATION), + byName.get(S1_SECTIONS.PIPE_FINANCING), + ] + .filter((t): t is string => typeof t === "string") + .join("\n\n"); + + const runSection = makeRunSection({ + deadLetters, + extractor_id: EXTRACTOR_ID, + extractor_version, + accession_number, + confidenceFloor: getMergerProxyConfidenceFloor(), + }); + let idx = 0; + + await runSection({ + sectionName: MERGER_SECTION, + text: mergerText === "" ? undefined : mergerText, + notFoundDetail: "no merger / business-combination / PIPE section text", + emptyDetail: "no merger deal returned", + lowConfidenceDetail: "below confidence floor", + verifyRow: (text, r) => spanAppearsIn(text, r.source_span), + unverifiedAllDetail: "merger deal source_span not present in section text", + extract: async (text) => { + const deal = await extractMergerDeal(text, model); + return deal === null ? [] : [deal]; + }, + persist: async (rows) => { + const deal = rows[0]; + const now = new Date().toISOString(); + let target_observation_id: number | null = null; + let target_cik: number | null = null; + const targetName = deal.target_name?.trim() ?? ""; + if (targetName !== "") { + const { observation_id, canonical_company_id } = await observer.observeCompany({ + accession_number, + extractor_id: EXTRACTOR_ID, + extractor_version, + observation_index: idx++, + name: targetName, + source_context: JSON.stringify({ relation: "merger-proxy:target" }), + }); + target_observation_id = observation_id; + // target_cik only when the resolved canonical company already carries one. + const canon = await new CanonicalCompanyRepo().getById(canonical_company_id); + target_cik = canon?.cik ?? null; + await provenance.save({ + kind: "company", + observation_id, + confidence: deal.confidence, + source_span: deal.source_span, + section_name: MERGER_SECTION, + model_id, + prompt_version: extractor_version, + extra: null, + }); + } + await new SpacMergerExtractionRepo().save({ + accession_number, + cik, + form, + filing_date, + extractor_id: EXTRACTOR_ID, + extractor_version, + target_name: targetName === "" ? null : targetName, + target_cik, + target_observation_id, + pipe_amount: deal.pipe_amount, + merger_consideration: deal.merger_consideration, + confidence: deal.confidence, + source_span: deal.source_span, + model_id, + created_at: now, + }); + return 1; + }, + }); + + // Emit the proxy event (definitive only) + recompute/correlate + rebuild. + await new SpacReportWriter().recordMergerProxy({ + cik, + accession_number, + filing_date, + form, + primary_document: args.primary_doc ?? null, + emitProxyEvent: DEFINITIVE_PROXY_FORMS.has(form), + }); +} diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14A.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14A.ts index efcfbaf9..74e188f6 100644 --- a/src/sec/forms/proxies-information-statements/Form_DEFM14A.ts +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14A.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_DEFM14A extends Form { static readonly name = "Definitive Proxy Statement for Merger or Acquisition"; static readonly description = "Provides official notification to designated classes of shareholders of matters relating to a merger or acquisition."; static readonly forms = ["DEFM14A"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14C.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14C.ts index 37b12754..610a92fe 100644 --- a/src/sec/forms/proxies-information-statements/Form_DEFM14C.ts +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14C.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_DEFM14C extends Form { static readonly name = "Definitive Information Statement for Merger or Acquisition"; static readonly description = "A definitive information statement relating to a merger or an acquisition."; static readonly forms = ["DEFM14C"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/proxies-information-statements/Form_DEFR14A.ts b/src/sec/forms/proxies-information-statements/Form_DEFR14A.ts index 067e33e8..44b59239 100644 --- a/src/sec/forms/proxies-information-statements/Form_DEFR14A.ts +++ b/src/sec/forms/proxies-information-statements/Form_DEFR14A.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_DEFR14A extends Form { static readonly name = "Definitive Revised Proxy Soliciting Materials"; static readonly description = "Definitive revised proxy soliciting materials filed pursuant to Section 14(a) of the Securities Exchange Act of 1934"; static readonly forms = ["DEFR14A"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/proxies-information-statements/Form_PREM14A.ts b/src/sec/forms/proxies-information-statements/Form_PREM14A.ts index 2783f888..cf674d43 100644 --- a/src/sec/forms/proxies-information-statements/Form_PREM14A.ts +++ b/src/sec/forms/proxies-information-statements/Form_PREM14A.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_PREM14A extends Form { static readonly name = "Preliminary Proxy Statement for Merger or Acquisition"; static readonly description = "A preliminary proxy statement relating to a merger or acquisition."; static readonly forms = ["PREM14A"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/proxies-information-statements/Form_PREM14C.ts b/src/sec/forms/proxies-information-statements/Form_PREM14C.ts index 744325d7..6112cbc9 100644 --- a/src/sec/forms/proxies-information-statements/Form_PREM14C.ts +++ b/src/sec/forms/proxies-information-statements/Form_PREM14C.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_PREM14C extends Form { static readonly name = "Preliminary Information Statement for Merger or Acquisition"; static readonly description = "A preliminary information statement relating to a merger or acquisition."; static readonly forms = ["PREM14C"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/proxies-information-statements/Form_PRER14A.ts b/src/sec/forms/proxies-information-statements/Form_PRER14A.ts index 87e91551..f302acd3 100644 --- a/src/sec/forms/proxies-information-statements/Form_PRER14A.ts +++ b/src/sec/forms/proxies-information-statements/Form_PRER14A.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_PRER14A extends Form { static readonly name = "Preliminary Revised Proxy Statement"; static readonly description = "Preliminary revised proxy statement filed pursuant to Section 14(a) of the Securities Exchange Act of 1934"; static readonly forms = ["PRER14A"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/SOURCES.md b/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/SOURCES.md new file mode 100644 index 00000000..6b1c242c --- /dev/null +++ b/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/SOURCES.md @@ -0,0 +1,21 @@ +# Merger-proxy (DEFM14A/PREM14A) fixtures + +## `defm14a_sample.txt` + +A **compact, hand-authored** DEFM14A full-submission fixture modeled on the real +EDGAR SGML structure (`` + a ``/`DEFM14A`/`` +envelope wrapping the primary HTML). It contains realistic `The Business +Combination` and `PIPE Financing` section headings with body prose naming a +target and a PIPE amount. + +It exists to exercise the **plumbing** of the merger-proxy path end to end — +`parseRegistrationSubmission` → `parseEdgarHtml` → `DocumentTreeSegmenter` → +section runner → `spac_merger_extraction` persistence → deal correlation → SPAC +rollup — under a **stubbed** structured-generation model (so the assertions do +not depend on a live LLM). It is **not** a verbatim EDGAR document and is not a +golden parser sample. + +Refreshing this tree with trimmed **real** SPAC DEFM14A/PREM14A submissions (for a +golden parser/segmenter test against authentic prospectus HTML) is future work; a +real full submission is multiple megabytes, so any committed real sample must be +trimmed to the merger/PIPE sections while keeping valid SGML + HTML. diff --git a/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/defm14a_sample.txt b/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/defm14a_sample.txt new file mode 100644 index 00000000..c84b14c2 --- /dev/null +++ b/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/defm14a_sample.txt @@ -0,0 +1,39 @@ +0001234567-21-000001.hdr.sgml : 20210501 +ACCESSION NUMBER: 0001234567-21-000001 +CONFORMED SUBMISSION TYPE: DEFM14A +PUBLIC DOCUMENT COUNT: 1 +CONFORMED PERIOD OF REPORT: 20210501 +FILED AS OF DATE: 20210501 +FILER: + COMPANY DATA: + COMPANY CONFORMED NAME: MERGE SPAC INC. + CENTRAL INDEX KEY: 0001234567 + STANDARD INDUSTRIAL CLASSIFICATION: BLANK CHECKS [6770] + STATE OF INCORPORATION: DE + + +DEFM14A +1 +defm14a.htm + + +Merge SPAC Inc. — Definitive Merger Proxy + +

The Business Combination

+

+Merge SPAC Inc., a blank check company, has entered into a business combination +with Acme Target Inc., a leading operating company in its sector. Upon the closing +of the business combination with Acme Target Inc., the post-combination company +will continue the business of Acme Target Inc. +

+

PIPE Financing

+

+In connection with the business combination, certain institutional investors have +agreed to purchase, in a private placement, shares for an aggregate PIPE investment +of $150,000,000, to be funded immediately prior to the closing of the business +combination. +

+ + +
+
diff --git a/src/sec/forms/registration-statements/s1/DocumentSegmenter.ts b/src/sec/forms/registration-statements/s1/DocumentSegmenter.ts index 8c1430a6..71828598 100644 --- a/src/sec/forms/registration-statements/s1/DocumentSegmenter.ts +++ b/src/sec/forms/registration-statements/s1/DocumentSegmenter.ts @@ -15,6 +15,10 @@ export const S1_SECTIONS = { UNDERWRITING: "Underwriting", USE_OF_PROCEEDS: "Use of Proceeds", THE_SPONSOR: "The Sponsor", + // Merger-proxy (DEFM14A/PREM14A) sections; read by the merger-proxy extractor. + THE_MERGER: "The Merger", + BUSINESS_COMBINATION: "The Business Combination", + PIPE_FINANCING: "PIPE Financing", } as const; export type S1SectionName = (typeof S1_SECTIONS)[keyof typeof S1_SECTIONS]; @@ -51,6 +55,16 @@ export const SECTION_HEADING_PATTERNS: Readonly + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterEach, describe, expect, it } from "bun:test"; +import { extractRedemption } from "./sectionExtractors"; +import { fakeS1Model, registerFakeStructuredProvider } from "./testing/fakeStructuredProvider"; + +let cleanup: (() => void) | undefined; +afterEach(() => { + cleanup?.(); + cleanup = undefined; +}); + +describe("extractRedemption", () => { + it("returns the parsed redemption row", async () => { + const { unregister } = registerFakeStructuredProvider([ + { + redemption_shares: 1234567, + redemption_amount: 12400000, + price_per_share: 10.05, + confidence: 0.9, + source_span: "1,234,567 shares elected to redeem for $12,400,000", + }, + ]); + cleanup = unregister; + const text = "Holders of 1,234,567 shares elected to redeem for $12,400,000."; + const row = await extractRedemption(text, fakeS1Model()); + expect(row).not.toBeNull(); + expect(row?.redemption_shares).toBe(1234567); + expect(row?.redemption_amount).toBe(12400000); + }); + + it("returns null when the model cites no source span", async () => { + // The null source_span is what drops the row — even a confident response is + // discarded without a verbatim span to anchor it (any confidence floor lives + // in the section runner, not here). + const { unregister } = registerFakeStructuredProvider([ + { + redemption_shares: null, + redemption_amount: null, + price_per_share: null, + confidence: 0.95, + source_span: null, + }, + ]); + cleanup = unregister; + const row = await extractRedemption("no redemption here", fakeS1Model()); + expect(row).toBeNull(); + }); + + it("returns null for a figure-less response (no shares and no amount)", async () => { + // The prompt tells the model to return confidence 0 with null fields when no + // realized redemption is present; even with a span, a row carrying neither a + // share count nor a dollar amount is not a redemption and must not persist. + const { unregister } = registerFakeStructuredProvider([ + { + redemption_shares: null, + redemption_amount: null, + price_per_share: null, + confidence: 0, + source_span: "no public shares were tendered for redemption", + }, + ]); + cleanup = unregister; + const row = await extractRedemption( + "No public shares were tendered for redemption.", + fakeS1Model() + ); + expect(row).toBeNull(); + }); + + it("rejects a negative redemption amount via schema validation", async () => { + // A sign-error / hallucinated negative amount would otherwise subtract from + // total_redemption_amount; minimum:0 makes runStructured throw so the caller + // dead-letters it instead of persisting corrupt data. + const { unregister } = registerFakeStructuredProvider([ + { + redemption_shares: 100, + redemption_amount: -8_200_000, + price_per_share: null, + confidence: 0.9, + source_span: "shares redeemed", + }, + ]); + cleanup = unregister; + await expect(extractRedemption("shares redeemed", fakeS1Model())).rejects.toThrow(); + }); +}); diff --git a/src/sec/forms/registration-statements/s1/mergerDealSchema.ts b/src/sec/forms/registration-statements/s1/mergerDealSchema.ts new file mode 100644 index 00000000..e0e805ea --- /dev/null +++ b/src/sec/forms/registration-statements/s1/mergerDealSchema.ts @@ -0,0 +1,23 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Static, Type } from "typebox"; +import { TypeNullable } from "../../../../util/TypeBoxUtil"; + +/** The single merger-deal object the model returns from a merger proxy. */ +export const MergerDealOutputSchema = Type.Object({ + target_name: TypeNullable( + Type.String({ description: "Operating company the SPAC will merge with" }) + ), + pipe_amount: TypeNullable(Type.Number({ description: "Total PIPE investment in dollars" })), + merger_consideration: TypeNullable( + Type.String({ description: "Short verbatim description of the consideration" }) + ), + confidence: Type.Number({ minimum: 0, maximum: 1 }), + source_span: TypeNullable(Type.String()), +}); + +export type MergerDealRow = Static; diff --git a/src/sec/forms/registration-statements/s1/mergerModel.test.ts b/src/sec/forms/registration-statements/s1/mergerModel.test.ts new file mode 100644 index 00000000..324c51ac --- /dev/null +++ b/src/sec/forms/registration-statements/s1/mergerModel.test.ts @@ -0,0 +1,36 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { getMergerProxyConfidenceFloor } from "./mergerModel"; +import { CONFIDENCE_FLOOR } from "./sectionRunner"; + +const ENV = "SEC_MERGER_PROXY_CONFIDENCE_FLOOR"; +// Snapshot immediately before each test (not at module load) so suite-level +// setup or other files touching this env var cannot corrupt the baseline. +let original: string | undefined; +beforeEach(() => { + original = process.env[ENV]; +}); +afterEach(() => { + if (original === undefined) delete process.env[ENV]; + else process.env[ENV] = original; +}); + +describe("getMergerProxyConfidenceFloor", () => { + it("uses the per-extractor env when set", () => { + process.env[ENV] = "0.7"; + expect(getMergerProxyConfidenceFloor()).toBe(0.7); + }); + it("falls back to the shared global floor when unset", () => { + delete process.env[ENV]; + expect(getMergerProxyConfidenceFloor()).toBe(CONFIDENCE_FLOOR); + }); + it("falls back on a non-numeric value", () => { + process.env[ENV] = "high"; + expect(getMergerProxyConfidenceFloor()).toBe(CONFIDENCE_FLOOR); + }); +}); diff --git a/src/sec/forms/registration-statements/s1/mergerModel.ts b/src/sec/forms/registration-statements/s1/mergerModel.ts new file mode 100644 index 00000000..089d4772 --- /dev/null +++ b/src/sec/forms/registration-statements/s1/mergerModel.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { ModelConfig } from "workglow"; +import { getGlobalModelRepository } from "workglow"; +import { resolveModelId } from "./s1Model"; +import { CONFIDENCE_FLOOR, parseConfidenceFloor } from "./sectionRunner"; + +export { resolveModelId }; + +const DEFAULT_MERGER_PROXY_MODEL = "claude-sonnet-4-6"; + +/** The model id used for merger-proxy extraction; overridable via SEC_MERGER_PROXY_MODEL. */ +export function getMergerProxyModelId(): string { + const id = (process.env.SEC_MERGER_PROXY_MODEL ?? "").trim(); + return id === "" ? DEFAULT_MERGER_PROXY_MODEL : id; +} + +/** Resolves the configured merger-proxy model into a ModelConfig. */ +export async function getMergerProxyModel(): Promise { + const id = getMergerProxyModelId(); + const record = await getGlobalModelRepository().findByName(id); + if (!record) { + throw new Error( + `Merger-proxy model '${id}' is not registered. Register it or set SEC_MERGER_PROXY_MODEL to a known model id.` + ); + } + return record as ModelConfig; +} + +/** + * Confidence floor for merger-proxy extraction. `SEC_MERGER_PROXY_CONFIDENCE_FLOOR` + * overrides; when unset it falls back to the shared `CONFIDENCE_FLOOR` + * (`SEC_S1_CONFIDENCE_FLOOR`), so behavior is unchanged unless explicitly set. + */ +export function getMergerProxyConfidenceFloor(): number { + return parseConfidenceFloor(process.env.SEC_MERGER_PROXY_CONFIDENCE_FLOOR, CONFIDENCE_FLOOR); +} diff --git a/src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts b/src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts new file mode 100644 index 00000000..64854d13 --- /dev/null +++ b/src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, expect, it } from "bun:test"; +import { parseEightKSubmission } from "./parseSubmission"; + +const wrap = (docs: string): string => + `\nACCESSION NUMBER: 0000000000-26-000001\n\n${docs}`; +const doc = (type: string, seq: number, body: string): string => + `\n${type}\n${seq}\n\n${body}\n\n\n`; + +describe("parseEightKSubmission", () => { + it("selects the primary 8-K body and collects EX-99.x exhibits", () => { + const txt = wrap( + doc("8-K", 1, "

Primary body

") + + doc("EX-99.1", 2, "

Press release

") + + doc("EX-99.2", 3, "

Second exhibit

") + + doc("EX-101.INS", 4, "ignored") + ); + const out = parseEightKSubmission("8-K", txt); + expect(out.primaryHtml).toContain("Primary body"); + expect(out.exhibitsHtml).toHaveLength(2); + expect(out.exhibitsHtml[0]).toContain("Press release"); + expect(out.exhibitsHtml[1]).toContain("Second exhibit"); + }); + + it("falls back to 1 then first doc when no TYPE matches the form", () => { + const txt = wrap(doc("8-K12B", 1, "

Seq one

") + doc("EX-99.1", 2, "

PR

")); + const out = parseEightKSubmission("8-K", txt); + expect(out.primaryHtml).toContain("Seq one"); + expect(out.exhibitsHtml).toHaveLength(1); + }); + + it("returns a bare body and no exhibits when there is no DOCUMENT envelope", () => { + const out = parseEightKSubmission("8-K", "

just a body

"); + expect(out.primaryHtml).toContain("just a body"); + expect(out.exhibitsHtml).toEqual([]); + }); +}); diff --git a/src/sec/forms/registration-statements/s1/parseSubmission.ts b/src/sec/forms/registration-statements/s1/parseSubmission.ts index 7d064357..0868d7f8 100644 --- a/src/sec/forms/registration-statements/s1/parseSubmission.ts +++ b/src/sec/forms/registration-statements/s1/parseSubmission.ts @@ -33,6 +33,12 @@ function headerSlice(txt: string): string { return firstDoc !== -1 ? txt.slice(0, firstDoc) : txt; } +/** Body after the SGML `
` boundary, else the whole input. */ +function bodyAfterHeader(txt: string): string { + const end = txt.indexOf(""); + return end !== -1 ? txt.slice(end + "".length) : txt; +} + /** * Parses the human-readable EDGAR submission header (and the older tagged * `` form as a fallback). Tolerant: any missing field is null. @@ -124,8 +130,7 @@ export function parseRegistrationSubmission(form: string, txt: string): FormS1Pa // No envelope: treat the input as a bare body. If a SEC-HEADER is // present (a malformed/truncated submission missing its document blocks), drop // it so the header lines aren't fed to the HTML converter as body text. - const end = txt.indexOf(""); - const html = end !== -1 ? txt.slice(end + "".length) : txt; + const html = bodyAfterHeader(txt); return { header, html, xbrlInstanceXml: null, feeExhibitHtml: null }; } const byType = docs.find((d) => d.type !== null && d.type.toUpperCase() === form.toUpperCase()); @@ -138,3 +143,31 @@ export function parseRegistrationSubmission(form: string, txt: string): FormS1Pa feeExhibitHtml: findFeeExhibit(docs), }; } + +/** Primary document body + EX-99.x exhibit bodies sliced from an 8-K submission. */ +export interface EightKSubmissionDocs { + readonly primaryHtml: string; + readonly exhibitsHtml: readonly string[]; +} + +/** + * Slices a full-submission `.txt` (or bare primary-doc body) into the primary + * document and its `EX-99.x` exhibits. The primary is the block whose `` + * equals `form`, else ` 1`, else the first; exhibits are every block + * whose `` starts with `EX-99`. With no `` envelope the whole + * input is the primary body and there are no exhibits. + */ +export function parseEightKSubmission(form: string, txt: string): EightKSubmissionDocs { + const docs = parseDocuments(txt); + if (docs.length === 0) { + const html = bodyAfterHeader(txt); + return { primaryHtml: html, exhibitsHtml: [] }; + } + const byType = docs.find((d) => d.type !== null && d.type.toUpperCase() === form.toUpperCase()); + const bySeq = docs.find((d) => d.sequence === 1); + const primary = byType ?? bySeq ?? docs[0]; + const exhibitsHtml = docs + .filter((d) => d.type !== null && d.type.toUpperCase().startsWith("EX-99")) + .map((d) => d.body); + return { primaryHtml: primary.body, exhibitsHtml }; +} diff --git a/src/sec/forms/registration-statements/s1/redemptionModel.test.ts b/src/sec/forms/registration-statements/s1/redemptionModel.test.ts new file mode 100644 index 00000000..599e1be5 --- /dev/null +++ b/src/sec/forms/registration-statements/s1/redemptionModel.test.ts @@ -0,0 +1,51 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { getRedemptionConfidenceFloor, getRedemptionModelId } from "./redemptionModel"; +import { CONFIDENCE_FLOOR } from "./sectionRunner"; + +const FLOOR_ENV = "SEC_REDEMPTION_CONFIDENCE_FLOOR"; +const MODEL_ENV = "SEC_REDEMPTION_MODEL"; + +let originalFloor: string | undefined; +let originalModel: string | undefined; +beforeEach(() => { + originalFloor = process.env[FLOOR_ENV]; + originalModel = process.env[MODEL_ENV]; +}); +afterEach(() => { + if (originalFloor === undefined) delete process.env[FLOOR_ENV]; + else process.env[FLOOR_ENV] = originalFloor; + if (originalModel === undefined) delete process.env[MODEL_ENV]; + else process.env[MODEL_ENV] = originalModel; +}); + +describe("getRedemptionModelId", () => { + it("defaults to claude-sonnet-4-6 when unset", () => { + delete process.env[MODEL_ENV]; + expect(getRedemptionModelId()).toBe("claude-sonnet-4-6"); + }); + it("honors SEC_REDEMPTION_MODEL when set", () => { + process.env[MODEL_ENV] = "claude-opus-4-8"; + expect(getRedemptionModelId()).toBe("claude-opus-4-8"); + }); +}); + +describe("getRedemptionConfidenceFloor", () => { + it("uses the per-extractor env when set", () => { + process.env[FLOOR_ENV] = "0.7"; + expect(getRedemptionConfidenceFloor()).toBe(0.7); + }); + it("falls back to the shared global floor when unset", () => { + delete process.env[FLOOR_ENV]; + expect(getRedemptionConfidenceFloor()).toBe(CONFIDENCE_FLOOR); + }); + it("falls back on a non-numeric value", () => { + process.env[FLOOR_ENV] = "high"; + expect(getRedemptionConfidenceFloor()).toBe(CONFIDENCE_FLOOR); + }); +}); diff --git a/src/sec/forms/registration-statements/s1/redemptionModel.ts b/src/sec/forms/registration-statements/s1/redemptionModel.ts new file mode 100644 index 00000000..5b32f379 --- /dev/null +++ b/src/sec/forms/registration-statements/s1/redemptionModel.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { ModelConfig } from "workglow"; +import { getGlobalModelRepository } from "workglow"; +import { resolveModelId } from "./s1Model"; +import { CONFIDENCE_FLOOR, parseConfidenceFloor } from "./sectionRunner"; + +export { resolveModelId }; + +const DEFAULT_REDEMPTION_MODEL = "claude-sonnet-4-6"; + +/** The model id used for redemption extraction; overridable via SEC_REDEMPTION_MODEL. */ +export function getRedemptionModelId(): string { + const id = (process.env.SEC_REDEMPTION_MODEL ?? "").trim(); + return id === "" ? DEFAULT_REDEMPTION_MODEL : id; +} + +/** Resolves the configured redemption model into a ModelConfig. */ +export async function getRedemptionModel(): Promise { + const id = getRedemptionModelId(); + const record = await getGlobalModelRepository().findByName(id); + if (!record) { + throw new Error( + `Redemption model '${id}' is not registered. Register it or set SEC_REDEMPTION_MODEL to a known model id.` + ); + } + return record as ModelConfig; +} + +/** + * Confidence floor for redemption extraction. `SEC_REDEMPTION_CONFIDENCE_FLOOR` + * overrides; when unset it falls back to the shared `CONFIDENCE_FLOOR` + * (`SEC_S1_CONFIDENCE_FLOOR`). + */ +export function getRedemptionConfidenceFloor(): number { + return parseConfidenceFloor(process.env.SEC_REDEMPTION_CONFIDENCE_FLOOR, CONFIDENCE_FLOOR); +} diff --git a/src/sec/forms/registration-statements/s1/redemptionSchema.ts b/src/sec/forms/registration-statements/s1/redemptionSchema.ts new file mode 100644 index 00000000..76fad551 --- /dev/null +++ b/src/sec/forms/registration-statements/s1/redemptionSchema.ts @@ -0,0 +1,25 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Static, Type } from "typebox"; +import { TypeNullable } from "../../../../util/TypeBoxUtil"; + +/** The single realized-redemption object the model returns from an 8-K. */ +export const RedemptionOutputSchema = Type.Object({ + redemption_shares: TypeNullable( + Type.Integer({ minimum: 0, description: "Shares redeemed (public shares tendered)" }) + ), + redemption_amount: TypeNullable( + Type.Number({ minimum: 0, description: "Total dollars paid to redeeming holders" }) + ), + price_per_share: TypeNullable( + Type.Number({ minimum: 0, description: "Per-share redemption / trust value, when stated" }) + ), + confidence: Type.Number({ minimum: 0, maximum: 1 }), + source_span: TypeNullable(Type.String()), +}); + +export type RedemptionRow = Static; diff --git a/src/sec/forms/registration-statements/s1/sectionExtractors.test.ts b/src/sec/forms/registration-statements/s1/sectionExtractors.test.ts index d8e4e341..232bb8bc 100644 --- a/src/sec/forms/registration-statements/s1/sectionExtractors.test.ts +++ b/src/sec/forms/registration-statements/s1/sectionExtractors.test.ts @@ -13,6 +13,7 @@ import { extractOfferingTerms, extractUnderwriters, extractUseOfProceeds, + extractMergerDeal, } from "./sectionExtractors"; import { fakeS1Model, registerFakeStructuredProvider } from "./testing/fakeStructuredProvider"; @@ -131,6 +132,37 @@ it("extractOfferingTerms returns the parsed offering object", async () => { } }); +it("extractMergerDeal returns the parsed merger object", async () => { + const { unregister } = registerFakeStructuredProvider([ + { + target_name: "Acme Target Inc.", + pipe_amount: 150000000, + merger_consideration: "$10.00 per share in stock", + confidence: 0.92, + source_span: "merger with Acme Target Inc.", + }, + ]); + try { + const got = await extractMergerDeal("THE MERGER ...", fakeS1Model()); + expect(got?.target_name).toBe("Acme Target Inc."); + expect(got?.pipe_amount).toBe(150000000); + } finally { + unregister(); + } +}); + +it("extractMergerDeal throws on schema-invalid model output (caller dead-letters it)", async () => { + // Missing the required `confidence` field -> schema validation rejects it. + const { unregister } = registerFakeStructuredProvider([ + { target_name: "Acme Target Inc.", pipe_amount: null, merger_consideration: null }, + ]); + try { + await expect(extractMergerDeal("THE MERGER ...", fakeS1Model())).rejects.toThrow(); + } finally { + unregister(); + } +}); + it("extractOfferingTerms throws on schema-invalid model output (caller dead-letters it)", async () => { const { unregister } = registerFakeStructuredProvider([{ tickers: [] }]); try { diff --git a/src/sec/forms/registration-statements/s1/sectionExtractors.ts b/src/sec/forms/registration-statements/s1/sectionExtractors.ts index bde0e20c..bb8af56a 100644 --- a/src/sec/forms/registration-statements/s1/sectionExtractors.ts +++ b/src/sec/forms/registration-statements/s1/sectionExtractors.ts @@ -18,6 +18,8 @@ import { SpacSponsorOutputSchema, type SpacSponsorRow } from "./spacSponsorSchem import { OfferingTermsOutputSchema, type OfferingTermsRow } from "./offeringTermsSchema"; import { UnderwriterOutputSchema, type UnderwriterRowOut } from "./underwriterSchema"; import { UseOfProceedsOutputSchema, type UseOfProceedsLineRow } from "./useOfProceedsSchema"; +import { MergerDealOutputSchema, type MergerDealRow } from "./mergerDealSchema"; +import { RedemptionOutputSchema, type RedemptionRow } from "./redemptionSchema"; const MAX_TOKENS = 4096; @@ -211,6 +213,24 @@ export async function extractSpacSponsors( return (obj.sponsors as SpacSponsorRow[] | undefined) ?? []; } +export async function extractMergerDeal( + sectionText: string, + model: ModelConfig +): Promise { + const instructions = + "The text between the tags below is from a SPAC merger proxy (DEFM14A/PREM14A). " + + "Identify the business-combination target and deal terms. Give target_name (the " + + "operating company the SPAC will merge with), pipe_amount (the total PIPE " + + "investment in dollars, or null), merger_consideration (a short verbatim phrase " + + "describing the consideration — e.g. cash, stock, exchange ratio — or null), a " + + "confidence in [0,1], and the verbatim source_span you drew the target from. " + + "Return JSON matching the schema."; + const prompt = `${UNTRUSTED_PREAMBLE}\n\n${instructions}\n\n${wrapUntrusted(sectionText)}`; + const obj = await runStructured(model, prompt, MergerDealOutputSchema); + if (obj.confidence == null || obj.source_span == null) return null; + return obj as unknown as MergerDealRow; +} + export async function extractUseOfProceeds( sectionText: string, model: ModelConfig @@ -224,3 +244,26 @@ export async function extractUseOfProceeds( const obj = await runStructured(model, prompt, UseOfProceedsOutputSchema); return (obj.line_items as UseOfProceedsLineRow[] | undefined) ?? []; } + +/** + * Extracts realized redemptions (shares, dollars, per-share value) from an 8-K + * narrative (vote-results / closing press release). Returns null when the model + * is not confident or cites no source span. Mirrors {@link extractMergerDeal}. + */ +export async function extractRedemption( + sectionText: string, + model: ModelConfig +): Promise { + const instructions = + "From the SEC 8-K text below, extract the REALIZED redemption of public " + + "shares (e.g. reported after a shareholder vote or upon closing). Report " + + "only figures explicitly stated — do NOT multiply shares by price to " + + "synthesize an amount. If the text does not report realized redemptions, " + + "return confidence 0 and null fields."; + const prompt = `${UNTRUSTED_PREAMBLE}\n\n${instructions}\n\n${wrapUntrusted(sectionText)}`; + const obj = await runStructured(model, prompt, RedemptionOutputSchema); + if (obj.confidence == null || obj.source_span == null) return null; + // A "no realized redemption" response carries neither figure — not a redemption. + if (obj.redemption_shares == null && obj.redemption_amount == null) return null; + return obj as unknown as RedemptionRow; +} diff --git a/src/sec/forms/registration-statements/s1/sectionRunner.test.ts b/src/sec/forms/registration-statements/s1/sectionRunner.test.ts new file mode 100644 index 00000000..bf014960 --- /dev/null +++ b/src/sec/forms/registration-statements/s1/sectionRunner.test.ts @@ -0,0 +1,95 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from "bun:test"; +import { makeRunSection, parseConfidenceFloor } from "./sectionRunner"; +import type { ExtractionDeadLetterRepo } from "../../../../storage/dead-letter/ExtractionDeadLetterRepo"; + +interface RecordedLetter { + section_name: string; + reason_code: string; +} + +/** Minimal stub: runSection only calls `record` and `markResolved`. */ +function stubDeadLetters(): { repo: ExtractionDeadLetterRepo; letters: RecordedLetter[]; resolved: string[] } { + const letters: RecordedLetter[] = []; + const resolved: string[] = []; + const repo = { + record: async (args: { section_name: string; reason_code: string }) => { + letters.push({ section_name: args.section_name, reason_code: args.reason_code }); + }, + markResolved: async (_id: string, _acc: string, section: string) => { + resolved.push(section); + }, + } as unknown as ExtractionDeadLetterRepo; + return { repo, letters, resolved }; +} + +describe("parseConfidenceFloor", () => { + it("returns the fallback for undefined, empty, or non-numeric input", () => { + expect(parseConfidenceFloor(undefined, 0.3)).toBe(0.3); + expect(parseConfidenceFloor("", 0.3)).toBe(0.3); + expect(parseConfidenceFloor(" ", 0.3)).toBe(0.3); + expect(parseConfidenceFloor("abc", 0.3)).toBe(0.3); + }); + it("parses a numeric floor", () => { + expect(parseConfidenceFloor("0.8", 0)).toBe(0.8); + expect(parseConfidenceFloor("0", 0.5)).toBe(0); + }); +}); + +describe("makeRunSection confidenceFloor", () => { + const baseRow = { confidence: 0.5, value: 1 }; + + it("dead-letters LOW_CONFIDENCE_ALL when rows fall below an explicit floor", async () => { + const { repo, letters } = stubDeadLetters(); + let persisted = 0; + const runSection = makeRunSection({ + deadLetters: repo, + extractor_id: "merger-proxy", + extractor_version: "1.0.0", + accession_number: "acc-1", + confidenceFloor: 0.8, + }); + await runSection({ + sectionName: "merger", + text: "some text", + emptyDetail: "empty", + lowConfidenceDetail: "all rows below confidence floor", + extract: async () => [baseRow], + persist: async () => { + persisted++; + return 1; + }, + }); + expect(persisted).toBe(0); + expect(letters).toEqual([{ section_name: "merger", reason_code: "LOW_CONFIDENCE_ALL" }]); + }); + + it("persists the same rows under the default floor (0)", async () => { + const { repo, resolved } = stubDeadLetters(); + let persisted = 0; + const runSection = makeRunSection({ + deadLetters: repo, + extractor_id: "merger-proxy", + extractor_version: "1.0.0", + accession_number: "acc-2", + }); + await runSection({ + sectionName: "merger", + text: "some text", + emptyDetail: "empty", + lowConfidenceDetail: "all rows below confidence floor", + extract: async () => [baseRow], + persist: async () => { + persisted++; + return 1; + }, + }); + expect(persisted).toBe(1); + expect(resolved).toEqual(["merger"]); + }); +}); diff --git a/src/sec/forms/registration-statements/s1/sectionRunner.ts b/src/sec/forms/registration-statements/s1/sectionRunner.ts index e6c3b952..b9234119 100644 --- a/src/sec/forms/registration-statements/s1/sectionRunner.ts +++ b/src/sec/forms/registration-statements/s1/sectionRunner.ts @@ -6,10 +6,20 @@ import type { ExtractionDeadLetterRepo } from "../../../../storage/dead-letter/ExtractionDeadLetterRepo"; -const RAW_CONFIDENCE_FLOOR = Number(process.env.SEC_S1_CONFIDENCE_FLOOR ?? "0"); -// A non-numeric SEC_S1_CONFIDENCE_FLOOR would be NaN, and `confidence >= NaN` is -// always false — silently dropping every row. Fall back to 0 (no floor). -export const CONFIDENCE_FLOOR = Number.isFinite(RAW_CONFIDENCE_FLOOR) ? RAW_CONFIDENCE_FLOOR : 0; +/** + * Parse a confidence-floor env value. Undefined, empty, or non-numeric input + * falls back to `fallback` — `Number` would otherwise coerce these to `0` + * (disabling the floor, admitting every row) or `NaN` (which, since + * `confidence >= NaN` is always false, silently drops every row). + */ +export function parseConfidenceFloor(raw: string | undefined, fallback: number): number { + if (raw === undefined || raw.trim() === "") return fallback; + const n = Number(raw); + return Number.isFinite(n) ? n : fallback; +} + +/** Shared default floor (S-1 / 424); merger-proxy overrides via makeRunSection. */ +export const CONFIDENCE_FLOOR = parseConfidenceFloor(process.env.SEC_S1_CONFIDENCE_FLOOR, 0); export interface RunSectionArgs { readonly sectionName: string; @@ -59,8 +69,10 @@ export function makeRunSection(opts: { readonly extractor_id: string; readonly extractor_version: string; readonly accession_number: string; + readonly confidenceFloor?: number; }): RunSection { const { deadLetters, extractor_id, extractor_version, accession_number } = opts; + const floor = opts.confidenceFloor ?? CONFIDENCE_FLOOR; return async function runSection( sargs: RunSectionArgs @@ -85,7 +97,7 @@ export function makeRunSection(opts: { try { const raw = await sargs.extract(sargs.text); - const confident = raw.filter((r) => r.confidence >= CONFIDENCE_FLOOR); + const confident = raw.filter((r) => r.confidence >= floor); const text = sargs.text; const verifyRow = sargs.verifyRow; let rows: TRow[]; diff --git a/src/storage/spac/SpacEventSchema.ts b/src/storage/spac/SpacEventSchema.ts index e8c4f8f9..d6645ca5 100644 --- a/src/storage/spac/SpacEventSchema.ts +++ b/src/storage/spac/SpacEventSchema.ts @@ -9,7 +9,12 @@ import type { ITabularStorage } from "workglow"; import { createServiceToken } from "workglow"; import { TypeNullable, TypeStringEnum } from "../../util/TypeBoxUtil"; -/** Lifecycle event vocabulary. Only `registration` and `ipo` are written today. */ +/** + * Lifecycle event vocabulary. `registration` / `ipo` come from S-1/424; the + * de-SPAC milestones `definitive_agreement` / `terminated` / `completed` / + * `vote` are written from 8-K item codes. The remaining types are reserved for + * deferred extractors (S-4/DEFM14A, Form 425, Form 25/15). + */ export const SPAC_EVENT_TYPES = [ "registration", "ipo", diff --git a/src/storage/spac/SpacMergerExtractionRepo.test.ts b/src/storage/spac/SpacMergerExtractionRepo.test.ts new file mode 100644 index 00000000..26e0a307 --- /dev/null +++ b/src/storage/spac/SpacMergerExtractionRepo.test.ts @@ -0,0 +1,55 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; +import { setupAllDatabases } from "../../config/setupAllDatabases"; +import { SpacMergerExtractionRepo } from "./SpacMergerExtractionRepo"; +import type { SpacMergerExtraction } from "./SpacMergerExtractionSchema"; + +function row( + p: Partial & Pick +): SpacMergerExtraction { + return { + form: "DEFM14A", + filing_date: "2021-05-01", + extractor_id: "merger-proxy", + extractor_version: "1.0.0", + target_name: null, + target_cik: null, + target_observation_id: null, + pipe_amount: null, + merger_consideration: null, + confidence: 0.9, + source_span: null, + model_id: null, + created_at: "2026-01-01T00:00:00.000Z", + ...p, + }; +} + +describe("SpacMergerExtractionRepo", () => { + let repo: SpacMergerExtractionRepo; + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + repo = new SpacMergerExtractionRepo(); + }); + + it("round-trips a row and overwrites by accession", async () => { + await repo.save(row({ accession_number: "a1", cik: 5, target_name: "Old Co" })); + await repo.save(row({ accession_number: "a1", cik: 5, target_name: "New Co" })); + expect((await repo.getByAccession("a1"))?.target_name).toBe("New Co"); + }); + + it("queries all extractions for a CIK", async () => { + await repo.save(row({ accession_number: "a1", cik: 5, target_name: "T1" })); + await repo.save(row({ accession_number: "a2", cik: 5, target_name: "T2" })); + await repo.save(row({ accession_number: "b1", cik: 6, target_name: "T3" })); + const forCik = await repo.getByCik(5); + expect(forCik.map((r) => r.target_name).sort()).toEqual(["T1", "T2"]); + }); +}); diff --git a/src/storage/spac/SpacMergerExtractionRepo.ts b/src/storage/spac/SpacMergerExtractionRepo.ts new file mode 100644 index 00000000..b1ca3667 --- /dev/null +++ b/src/storage/spac/SpacMergerExtractionRepo.ts @@ -0,0 +1,34 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { globalServiceRegistry } from "workglow"; +import { + SpacMergerExtraction, + SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN, + SpacMergerExtractionRepositoryStorage, +} from "./SpacMergerExtractionSchema"; + +/** Per-accession merger-proxy extraction rows. */ +export class SpacMergerExtractionRepo { + private readonly storage: SpacMergerExtractionRepositoryStorage; + + constructor(storage?: SpacMergerExtractionRepositoryStorage) { + this.storage = storage ?? globalServiceRegistry.get(SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN); + } + + async save(row: SpacMergerExtraction): Promise { + await this.storage.put(row); + } + + async getByAccession(accession_number: string): Promise { + return this.storage.get({ accession_number }); + } + + /** All extractions for a CIK (unordered). */ + async getByCik(cik: number): Promise { + return (await this.storage.query({ cik })) || []; + } +} diff --git a/src/storage/spac/SpacMergerExtractionSchema.ts b/src/storage/spac/SpacMergerExtractionSchema.ts new file mode 100644 index 00000000..39632152 --- /dev/null +++ b/src/storage/spac/SpacMergerExtractionSchema.ts @@ -0,0 +1,48 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Static, Type } from "typebox"; +import type { ITabularStorage } from "workglow"; +import { createServiceToken } from "workglow"; +import { TypeNullable } from "../../util/TypeBoxUtil"; + +/** + * One row per merger-proxy filing (DEFM14A/PREM14A). Current-state: a + * re-extraction overwrites by accession. `target_*` / `pipe_amount` are + * correlated onto the matching `spac_deal` by `deriveDeals`; `merger_consideration` + * stays here (report + provenance only). + */ +export const SpacMergerExtractionSchema = Type.Object({ + accession_number: Type.String({ maxLength: 25 }), + cik: Type.Integer({ minimum: 0, description: "SPAC origin CIK (filer)" }), + form: Type.String({ maxLength: 20 }), + filing_date: Type.String({ format: "date" }), + extractor_id: Type.String({ maxLength: 32 }), + extractor_version: Type.String({ maxLength: 32 }), + target_name: TypeNullable(Type.String({ maxLength: 300 })), + target_cik: TypeNullable(Type.Integer({ minimum: 0 })), + target_observation_id: TypeNullable(Type.Integer({ minimum: 0 })), + pipe_amount: TypeNullable(Type.Number()), + merger_consideration: TypeNullable(Type.String({ maxLength: 2000 })), + confidence: Type.Number(), + source_span: TypeNullable(Type.String({ maxLength: 2000 })), + model_id: TypeNullable(Type.String({ maxLength: 128 })), + created_at: Type.String({ format: "date-time" }), +}); + +export type SpacMergerExtraction = Static; + +export const SpacMergerExtractionPrimaryKeyNames = ["accession_number"] as const; +export type SpacMergerExtractionRepositoryStorage = ITabularStorage< + typeof SpacMergerExtractionSchema, + typeof SpacMergerExtractionPrimaryKeyNames, + SpacMergerExtraction +>; + +export const SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN = + createServiceToken( + "sec.storage.spacMergerExtractionRepository" + ); diff --git a/src/storage/spac/SpacRedemptionExtractionRepo.test.ts b/src/storage/spac/SpacRedemptionExtractionRepo.test.ts new file mode 100644 index 00000000..e21f716e --- /dev/null +++ b/src/storage/spac/SpacRedemptionExtractionRepo.test.ts @@ -0,0 +1,61 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; +import { setupAllDatabases } from "../../config/setupAllDatabases"; +import { SpacRedemptionExtractionRepo } from "./SpacRedemptionExtractionRepo"; +import type { SpacRedemptionExtraction } from "./SpacRedemptionExtractionSchema"; + +function row( + p: Partial & Pick +): SpacRedemptionExtraction { + return { + form: "8-K", + filing_date: "2021-05-01", + extractor_id: "redemption", + extractor_version: "1.0.0", + redemption_shares: null, + redemption_amount: null, + price_per_share: null, + confidence: 0.9, + source_span: null, + model_id: null, + created_at: "2026-01-01T00:00:00.000Z", + ...p, + }; +} + +describe("SpacRedemptionExtractionRepo", () => { + let repo: SpacRedemptionExtractionRepo; + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + repo = new SpacRedemptionExtractionRepo(); + }); + + it("round-trips a row and overwrites by accession", async () => { + await repo.save(row({ accession_number: "a1", cik: 5, redemption_amount: 100_000 })); + await repo.save(row({ accession_number: "a1", cik: 5, redemption_amount: 200_000 })); + expect((await repo.getByAccession("a1"))?.redemption_amount).toBe(200_000); + }); + + it("queries all extractions for a CIK", async () => { + await repo.save(row({ accession_number: "a1", cik: 5, redemption_shares: 1000 })); + await repo.save(row({ accession_number: "a2", cik: 5, redemption_shares: 2000 })); + await repo.save(row({ accession_number: "b1", cik: 6, redemption_shares: 3000 })); + expect((await repo.getByCik(5)).length).toBe(2); + expect(await repo.getByCik(99)).toEqual([]); + }); + + it("re-save same accession stays length 1 with updated field", async () => { + await repo.save(row({ accession_number: "a1", cik: 5, price_per_share: 10.0 })); + await repo.save(row({ accession_number: "a1", cik: 5, price_per_share: 10.12 })); + const rows = await repo.getByCik(5); + expect(rows.length).toBe(1); + expect(rows[0].price_per_share).toBe(10.12); + }); +}); diff --git a/src/storage/spac/SpacRedemptionExtractionRepo.ts b/src/storage/spac/SpacRedemptionExtractionRepo.ts new file mode 100644 index 00000000..0a613b52 --- /dev/null +++ b/src/storage/spac/SpacRedemptionExtractionRepo.ts @@ -0,0 +1,34 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { globalServiceRegistry } from "workglow"; +import { + SpacRedemptionExtraction, + SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN, + SpacRedemptionExtractionRepositoryStorage, +} from "./SpacRedemptionExtractionSchema"; + +/** Per-accession redemption-extraction rows. */ +export class SpacRedemptionExtractionRepo { + private readonly storage: SpacRedemptionExtractionRepositoryStorage; + + constructor(storage?: SpacRedemptionExtractionRepositoryStorage) { + this.storage = storage ?? globalServiceRegistry.get(SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN); + } + + async save(row: SpacRedemptionExtraction): Promise { + await this.storage.put(row); + } + + async getByAccession(accession_number: string): Promise { + return this.storage.get({ accession_number }); + } + + /** All extractions for a CIK (unordered). */ + async getByCik(cik: number): Promise { + return (await this.storage.query({ cik })) || []; + } +} diff --git a/src/storage/spac/SpacRedemptionExtractionSchema.ts b/src/storage/spac/SpacRedemptionExtractionSchema.ts new file mode 100644 index 00000000..54d3c985 --- /dev/null +++ b/src/storage/spac/SpacRedemptionExtractionSchema.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Static, Type } from "typebox"; +import type { ITabularStorage } from "workglow"; +import { createServiceToken } from "workglow"; +import { TypeNullable } from "../../util/TypeBoxUtil"; + +/** One row per redemption-extraction filing. Current-state: a re-extraction overwrites by accession. */ +export const SpacRedemptionExtractionSchema = Type.Object({ + accession_number: Type.String({ maxLength: 25 }), + cik: Type.Integer({ minimum: 0, description: "SPAC origin CIK (filer)" }), + form: Type.String({ maxLength: 20 }), + filing_date: Type.String({ format: "date" }), + extractor_id: Type.String({ maxLength: 32 }), + extractor_version: Type.String({ maxLength: 32 }), + redemption_shares: TypeNullable(Type.Integer({ minimum: 0 })), + redemption_amount: TypeNullable(Type.Number()), + price_per_share: TypeNullable(Type.Number()), + confidence: Type.Number(), + source_span: TypeNullable(Type.String({ maxLength: 2000 })), + model_id: TypeNullable(Type.String({ maxLength: 128 })), + created_at: Type.String({ format: "date-time" }), +}); + +export type SpacRedemptionExtraction = Static; + +export const SpacRedemptionExtractionPrimaryKeyNames = ["accession_number"] as const; +export type SpacRedemptionExtractionRepositoryStorage = ITabularStorage< + typeof SpacRedemptionExtractionSchema, + typeof SpacRedemptionExtractionPrimaryKeyNames, + SpacRedemptionExtraction +>; + +export const SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN = + createServiceToken( + "sec.storage.spacRedemptionExtractionRepository" + ); diff --git a/src/storage/spac/SpacRepo.ts b/src/storage/spac/SpacRepo.ts index ca16be50..5557c063 100644 --- a/src/storage/spac/SpacRepo.ts +++ b/src/storage/spac/SpacRepo.ts @@ -49,10 +49,19 @@ export class SpacRepo { return (await this.spacRepository.query({ status })) || []; } + /** Every spac row, regardless of status. */ + async getAllSpacs(): Promise { + return (await this.spacRepository.getAll()) || []; + } + async saveDeal(deal: SpacDeal): Promise { await this.dealRepository.put(deal); } + async deleteDeal(cik: number, deal_index: number): Promise { + await this.dealRepository.delete({ cik, deal_index }); + } + /** Deals for a CIK, ascending by deal_index. */ async getDeals(cik: number): Promise { const rows = (await this.dealRepository.query({ cik })) || []; diff --git a/src/storage/spac/SpacReportWriter.test.ts b/src/storage/spac/SpacReportWriter.test.ts index 998fb37f..5714c994 100644 --- a/src/storage/spac/SpacReportWriter.test.ts +++ b/src/storage/spac/SpacReportWriter.test.ts @@ -10,6 +10,7 @@ import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; import { setupAllDatabases } from "../../config/setupAllDatabases"; import { SpacRepo } from "./SpacRepo"; import { SpacReportWriter } from "./SpacReportWriter"; +import { SpacMergerExtractionRepo } from "./SpacMergerExtractionRepo"; import { CHANGE_LOG_REPOSITORY_TOKEN } from "../change-tracking/ChangeLogSchema"; describe("SpacReportWriter", () => { @@ -179,4 +180,152 @@ describe("SpacReportWriter", () => { const row = await repo.getSpac(9); expect(JSON.parse(row!.spac_tickers!)).toEqual(["NEO.U", "NEO"]); }); + + it("rolls a registered SPAC forward through DA, vote, and completion", async () => { + await writer.recordRegistration({ + cik: 10, + accession_number: "0000-reg", + filing_date: "2020-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Merge SPAC", + spac_sic: 6770, + }); + + await writer.recordDealMilestones({ + cik: 10, + accession_number: "0000-da", + filing_date: "2021-03-05", + form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2021-03-01" }], + }); + let row = await repo.getSpac(10); + expect(row?.status).toBe("deal_announced"); + expect(row?.definitive_agreement_date).toBe("2021-03-01"); + + await writer.recordDealMilestones({ + cik: 10, + accession_number: "0000-vote", + filing_date: "2021-06-02", + form: "8-K", + primary_document: null, + events: [{ event_type: "vote", event_date: "2021-06-01" }], + }); + row = await repo.getSpac(10); + expect(row?.status).toBe("proxy"); + expect(row?.vote_date).toBe("2021-06-01"); + + await writer.recordDealMilestones({ + cik: 10, + accession_number: "0000-close", + filing_date: "2021-06-16", + form: "8-K", + primary_document: null, + events: [{ event_type: "completed", event_date: "2021-06-15" }], + }); + row = await repo.getSpac(10); + expect(row?.status).toBe("completed"); + expect(row?.completed_date).toBe("2021-06-15"); + + const deals = await repo.getDeals(10); + expect(deals.length).toBe(1); + expect(deals[0].outcome).toBe("completed"); + expect(deals[0].target_name).toBeNull(); // not available from item codes + }); + + it("is idempotent when the same milestone 8-K is reprocessed", async () => { + const call = { + cik: 11, + accession_number: "0000-da", + filing_date: "2021-03-05", + form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement" as const, event_date: "2021-03-01" }], + }; + await writer.recordDealMilestones(call); + await writer.recordDealMilestones(call); + + const events = await repo.getEvents(11); + expect(events.filter((e) => e.event_type === "definitive_agreement").length).toBe(1); + const deals = await repo.getDeals(11); + expect(deals.length).toBe(1); + }); + + it("does nothing when given no events", async () => { + await writer.recordDealMilestones({ + cik: 12, + accession_number: "0000-none", + filing_date: "2021-03-05", + form: "8-K", + primary_document: null, + events: [], + }); + expect(await repo.getSpac(12)).toBeUndefined(); + expect(await repo.getEvents(12)).toEqual([]); + }); + + it("derives target/pipe + proxy from a recorded merger proxy and rolls up", async () => { + await writer.recordRegistration({ + cik: 20, accession_number: "20-reg", filing_date: "2020-12-01", form: "S-1", + primary_document: "s1.htm", spac_name: "Merge SPAC", spac_sic: 6770, + }); + await writer.recordDealMilestones({ + cik: 20, accession_number: "20-da", filing_date: "2021-03-05", form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2021-03-01" }], + }); + + await new SpacMergerExtractionRepo().save({ + accession_number: "20-defm", cik: 20, form: "DEFM14A", filing_date: "2021-05-01", + extractor_id: "merger-proxy", extractor_version: "1.0.0", + target_name: "Acme Target Inc.", target_cik: 999, target_observation_id: 1, + pipe_amount: 150_000_000, merger_consideration: "$10.00 per share in stock", + confidence: 0.95, source_span: "merger with Acme Target Inc.", model_id: "claude-sonnet-4-6", + created_at: new Date().toISOString(), + }); + await writer.recordMergerProxy({ + cik: 20, accession_number: "20-defm", filing_date: "2021-05-01", + form: "DEFM14A", primary_document: "defm.htm", emitProxyEvent: true, + }); + + const row = await repo.getSpac(20); + expect(row?.status).toBe("proxy"); + expect(row?.target_name).toBe("Acme Target Inc."); + expect(row?.pipe_amount).toBe(150_000_000); + expect(row?.proxy_date).toBe("2021-05-01"); + + const deals = await repo.getDeals(20); + expect(deals[0].target_name).toBe("Acme Target Inc."); + expect(deals[0].target_cik).toBe(999); + }); + + it("does not emit a proxy event for a preliminary proxy (PREM14A)", async () => { + await writer.recordRegistration({ + cik: 21, accession_number: "21-reg", filing_date: "2020-12-01", form: "S-1", + primary_document: "s1.htm", spac_name: "Merge SPAC", spac_sic: 6770, + }); + await writer.recordDealMilestones({ + cik: 21, accession_number: "21-da", filing_date: "2021-03-05", form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2021-03-01" }], + }); + await new SpacMergerExtractionRepo().save({ + accession_number: "21-prem", cik: 21, form: "PREM14A", filing_date: "2021-04-01", + extractor_id: "merger-proxy", extractor_version: "1.0.0", + target_name: "Acme Target Inc.", target_cik: null, target_observation_id: null, + pipe_amount: null, merger_consideration: null, confidence: 0.9, source_span: null, + model_id: null, created_at: new Date().toISOString(), + }); + await writer.recordMergerProxy({ + cik: 21, accession_number: "21-prem", filing_date: "2021-04-01", + form: "PREM14A", primary_document: "prem.htm", emitProxyEvent: false, + }); + + const events = await repo.getEvents(21); + expect(events.some((e) => e.event_type === "proxy")).toBe(false); + const row = await repo.getSpac(21); + expect(row?.target_name).toBe("Acme Target Inc."); // still correlated + expect(row?.status).toBe("deal_announced"); // no proxy event -> not "proxy" + }); }); diff --git a/src/storage/spac/SpacReportWriter.ts b/src/storage/spac/SpacReportWriter.ts index 00efef47..50ff32c6 100644 --- a/src/storage/spac/SpacReportWriter.ts +++ b/src/storage/spac/SpacReportWriter.ts @@ -7,8 +7,11 @@ import { globalServiceRegistry, uuid4 } from "workglow"; import { SpacRepo } from "./SpacRepo"; import { buildSpacRow, type SpacRowPatch } from "./spacRollup"; +import { deriveDeals } from "./spacDealGrouping"; +import { SpacMergerExtractionRepo } from "./SpacMergerExtractionRepo"; +import { SpacRedemptionExtractionRepo } from "./SpacRedemptionExtractionRepo"; import type { Spac } from "./SpacSchema"; -import type { SpacEvent } from "./SpacEventSchema"; +import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; import type { SpacHistory } from "./SpacHistorySchema"; import { CHANGE_LOG_REPOSITORY_TOKEN } from "../change-tracking/ChangeLogSchema"; @@ -33,6 +36,26 @@ interface RecordIpoArgs { readonly spac_tickers: readonly string[] | null; } +interface RecordDealMilestonesArgs { + readonly cik: number; + readonly accession_number: string; + readonly filing_date: string; + readonly form: string; + readonly primary_document: string | null; + /** event_date is pre-resolved by the caller (report_date ?? filing_date). */ + readonly events: readonly { event_type: SpacEventType; event_date: string }[]; +} + +interface RecordMergerProxyArgs { + readonly cik: number; + readonly accession_number: string; + readonly filing_date: string; + readonly form: string; + readonly primary_document: string | null; + /** true for DEFM14A (definitive); false for PREM14A (preliminary). */ + readonly emitProxyEvent: boolean; +} + /** Fields compared for ChangeLog/history; everything except the volatile timestamp. */ const TRACKED_FIELDS: readonly (keyof Spac)[] = [ "current_cik", "status", "spac_name", "target_name", "surviving_name", "current_name", @@ -49,6 +72,8 @@ const TRACKED_FIELDS: readonly (keyof Spac)[] = [ */ export class SpacReportWriter { private readonly repo: SpacRepo; + private readonly mergerExtractions = new SpacMergerExtractionRepo(); + private readonly redemptionExtractions = new SpacRedemptionExtractionRepo(); constructor(repo: SpacRepo = new SpacRepo()) { this.repo = repo; @@ -89,6 +114,92 @@ export class SpacReportWriter { }); } + /** + * Record de-SPAC milestone events mapped from 8-K item codes: append each + * event (idempotent by PK), recompute the deal set from the full event + * stream (merge-preserving §4b-owned columns), then rebuild the row. + */ + async recordDealMilestones(args: RecordDealMilestonesArgs): Promise { + if (args.events.length === 0) return; + for (const e of args.events) { + await this.appendEvent({ + cik: args.cik, + accession_number: args.accession_number, + event_type: e.event_type, + event_date: e.event_date, + form: args.form, + primary_document: args.primary_document, + }); + } + await this.recomputeAndSaveDeals(args.cik); + await this.rebuild(args.cik, args.filing_date, `${args.form}:${args.accession_number}`, {}); + } + + /** + * Record a merger proxy: emit a `proxy` event for the definitive proxy + * (DEFM14A), recompute deals from the event stream + stored merger + * extractions (correlation derives target/pipe), then rebuild the row. The + * extraction itself is persisted by the caller (`processMergerProxy`) before + * this runs. + */ + async recordMergerProxy(args: RecordMergerProxyArgs): Promise { + if (args.emitProxyEvent) { + await this.appendEvent({ + cik: args.cik, + accession_number: args.accession_number, + event_type: "proxy", + event_date: args.filing_date, + form: args.form, + primary_document: args.primary_document, + }); + } + await this.recomputeAndSaveDeals(args.cik); + await this.rebuild(args.cik, args.filing_date, `${args.form}:${args.accession_number}`, {}); + } + + /** + * Record a realized redemption: recompute deals from the event stream + + * stored redemption extractions (correlation derives redemption_amount / + * redemption_shares onto the matching deal), then rebuild the row. No event + * is appended — redemptions never advance the lifecycle and an extra event + * would double-count in the rollup. The extraction itself is persisted by the + * caller (`processRedemption8K`) before this runs. + */ + async recordRedemption(args: { + readonly cik: number; + readonly accession_number: string; + readonly filing_date: string; + readonly form: string; + }): Promise { + await this.recomputeAndSaveDeals(args.cik); + await this.rebuild(args.cik, args.filing_date, `${args.form}:${args.accession_number}`, {}); + } + + /** + * Rebuild the deal set from the CIK's full event stream + merger extractions + * (the single derivation path shared by the 8-K and merger-proxy writers). + */ + private async recomputeAndSaveDeals(cik: number): Promise { + const [events, extractions, redemptions, existingDeals] = await Promise.all([ + this.repo.getEvents(cik), + this.mergerExtractions.getByCik(cik), + this.redemptionExtractions.getByCik(cik), + this.repo.getDeals(cik), + ]); + const deals = deriveDeals(cik, events, extractions, redemptions, existingDeals); + // Reconcile: if a prior derivation yielded more deals than this one (the + // event stream or derivation logic changed), delete the orphaned rows. + // saveDeal only upserts, so without this their stale columns — notably + // redemption_amount — would still be summed into the rolled-up totals. + const liveIndexes = new Set(deals.map((d) => d.deal_index)); + for (const existing of existingDeals) { + if (!liveIndexes.has(existing.deal_index)) { + await this.repo.deleteDeal(existing.cik, existing.deal_index); + } + } + for (const deal of deals) await this.repo.saveDeal(deal); + } + private async appendEvent( partial: Pick & Partial diff --git a/src/storage/spac/recomputeDeals.reconcile.test.ts b/src/storage/spac/recomputeDeals.reconcile.test.ts new file mode 100644 index 00000000..fd03ba89 --- /dev/null +++ b/src/storage/spac/recomputeDeals.reconcile.test.ts @@ -0,0 +1,86 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; +import { setupAllDatabases } from "../../config/setupAllDatabases"; +import { SpacRepo } from "./SpacRepo"; +import { SpacReportWriter } from "./SpacReportWriter"; +import type { SpacDeal } from "./SpacDealSchema"; + +/** A stale orphan deal row carrying a redemption amount, as a prior larger derivation would leave. */ +const orphanDeal = (cik: number, deal_index: number): SpacDeal => ({ + cik, + deal_index, + target_name: null, + target_cik: null, + announced_date: null, + definitive_agreement_date: null, + proxy_date: null, + vote_date: null, + pipe_amount: null, + redemption_amount: 999_999, + redemption_shares: 1, + outcome: "completed", + outcome_date: "2026-09-09", + source_accession: "orphan", + created_at: "2026-01-01T00:00:00.000Z", +}); + +describe("recomputeAndSaveDeals reconciliation", () => { + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + }); + + it("deletes deal rows no longer in the derived set so the rollup drops their amounts", async () => { + const cik = 555; + const writer = new SpacReportWriter(); + await writer.recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Reconcile SPAC Inc.", + spac_sic: 6770, + }); + // A two-event stream derives exactly one deal (deal_index 0). + await writer.recordDealMilestones({ + cik, + accession_number: `${cik}-da`, + filing_date: "2026-01-10", + form: "8-K", + primary_document: null, + events: [ + { event_type: "definitive_agreement", event_date: "2026-01-10" }, + { event_type: "completed", event_date: "2026-03-20" }, + ], + }); + const repo = new SpacRepo(); + expect((await repo.getDeals(cik)).map((d) => d.deal_index)).toEqual([0]); + + // Inject a stale orphan (deal_index 1) as a prior, larger derivation would have left. + await repo.saveDeal(orphanDeal(cik, 1)); + expect((await repo.getDeals(cik)).length).toBe(2); + + // Any recompute must reconcile away the orphan. recordRedemption appends no + // event — it only recomputes deals + rebuilds the row. + await writer.recordRedemption({ + cik, + accession_number: `${cik}-redemption`, + filing_date: "2026-03-21", + form: "8-K", + }); + + const after = await repo.getDeals(cik); + expect(after.map((d) => d.deal_index)).toEqual([0]); + // The orphan's 999,999 is no longer summed into the rollup. + const spac = await repo.getSpac(cik); + expect(spac).toBeDefined(); + expect(spac?.total_redemption_amount ?? null).toBeNull(); + }); +}); diff --git a/src/storage/spac/spacDealGrouping.redemption.test.ts b/src/storage/spac/spacDealGrouping.redemption.test.ts new file mode 100644 index 00000000..9ad5bac9 --- /dev/null +++ b/src/storage/spac/spacDealGrouping.redemption.test.ts @@ -0,0 +1,98 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, expect, it } from "bun:test"; +import { deriveDeals } from "./spacDealGrouping"; +import type { SpacEvent } from "./SpacEventSchema"; +import type { SpacRedemptionExtraction } from "./SpacRedemptionExtractionSchema"; + +const ev = (event_type: string, event_date: string, acc: string): SpacEvent => + ({ + cik: 1, + accession_number: acc, + event_type, + event_date, + form: "8-K", + primary_document: null, + source_document_url: null, + deal_index: null, + amount: null, + shares: null, + detail: null, + confidence: null, + created_at: "2026-01-01T00:00:00.000Z", + }) as unknown as SpacEvent; + +const red = ( + acc: string, + filing_date: string, + shares: number | null, + amount: number | null +): SpacRedemptionExtraction => ({ + accession_number: acc, + cik: 1, + form: "8-K", + filing_date, + extractor_id: "redemption", + extractor_version: "1.0.0", + redemption_shares: shares, + redemption_amount: amount, + price_per_share: null, + confidence: 0.9, + source_span: "x", + model_id: "fake", + created_at: "2026-01-01T00:00:00.000Z", +}); + +describe("deriveDeals redemption correlation", () => { + it("attaches a redemption filed at/after the deal's completion date", () => { + const events = [ + ev("definitive_agreement", "2026-01-10", "da-1"), + ev("completed", "2026-03-20", "close-1"), + ]; + const deals = deriveDeals(1, events, [], [red("r-1", "2026-03-20", 500000, 5_100_000)], []); + expect(deals).toHaveLength(1); + expect(deals[0].redemption_amount).toBe(5_100_000); + expect(deals[0].redemption_shares).toBe(500000); + }); + + it("buckets redemptions by announcement window across two deals", () => { + const events = [ + ev("definitive_agreement", "2026-01-10", "da-1"), + ev("terminated", "2026-02-15", "term-1"), + ev("definitive_agreement", "2026-04-01", "da-2"), + ev("completed", "2026-06-01", "close-2"), + ]; + const reds = [red("r-1", "2026-02-10", 100, 1000), red("r-2", "2026-06-01", 200, 2000)]; + const deals = deriveDeals(1, events, [], reds, []); + expect(deals[0].redemption_amount).toBe(1000); + expect(deals[1].redemption_amount).toBe(2000); + }); + + it("latest redemption filing wins; non-null survives a later null", () => { + const events = [ev("definitive_agreement", "2026-01-10", "da-1")]; + const reds = [red("r-1", "2026-03-01", 100, 1000), red("r-2", "2026-03-05", 150, null)]; + const deals = deriveDeals(1, events, [], reds, []); + expect(deals[0].redemption_shares).toBe(150); + expect(deals[0].redemption_amount).toBe(1000); + }); + + it("leaves redemptions unattached when there is no deal", () => { + const deals = deriveDeals(1, [], [], [red("r-1", "2026-03-01", 100, 1000)], []); + expect(deals).toEqual([]); + }); + + it("attaches a vote-results redemption filed before a completion-only deal's date", () => { + // A SPAC whose only ingested milestone is the completion 8-K (no 1.01 DA); + // the `vote` event opens no deal, so the deal is opened solely by `completed` + // and its only date is the later outcome_date. A redemption reported at the + // vote (filed before closing) must still attach to that single deal. + const events = [ev("vote", "2026-03-19", "vote-1"), ev("completed", "2026-03-20", "close-1")]; + const deals = deriveDeals(1, events, [], [red("r-1", "2026-03-19", 400000, 4_000_000)], []); + expect(deals).toHaveLength(1); + expect(deals[0].redemption_amount).toBe(4_000_000); + expect(deals[0].redemption_shares).toBe(400000); + }); +}); diff --git a/src/storage/spac/spacDealGrouping.test.ts b/src/storage/spac/spacDealGrouping.test.ts new file mode 100644 index 00000000..dcc4ba1c --- /dev/null +++ b/src/storage/spac/spacDealGrouping.test.ts @@ -0,0 +1,234 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from "bun:test"; +import { deriveDeals } from "./spacDealGrouping"; +import type { SpacDeal } from "./SpacDealSchema"; +import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; +import type { SpacMergerExtraction } from "./SpacMergerExtractionSchema"; + +function ev( + event_type: SpacEventType, + event_date: string, + accession_number = `${event_date}-${event_type}` +): SpacEvent { + return { + cik: 1, + accession_number, + event_type, + event_date, + form: "8-K", + primary_document: null, + source_document_url: null, + deal_index: null, + amount: null, + shares: null, + detail: null, + confidence: null, + created_at: "2026-01-01T00:00:00.000Z", + }; +} + +function ext( + accession_number: string, + filing_date: string, + p: Partial = {} +): SpacMergerExtraction { + return { + accession_number, + cik: 1, + form: "DEFM14A", + filing_date, + extractor_id: "merger-proxy", + extractor_version: "1.0.0", + target_name: null, + target_cik: null, + target_observation_id: null, + pipe_amount: null, + merger_consideration: null, + confidence: 0.9, + source_span: null, + model_id: null, + created_at: "2026-01-01T00:00:00.000Z", + ...p, + }; +} + +function deal(p: Pick & Partial): SpacDeal { + return { + cik: 1, + target_name: null, + target_cik: null, + announced_date: null, + definitive_agreement_date: null, + proxy_date: null, + vote_date: null, + pipe_amount: null, + redemption_amount: null, + redemption_shares: null, + outcome_date: null, + source_accession: null, + created_at: "2026-01-01T00:00:00.000Z", + ...p, + }; +} + +describe("deriveDeals", () => { + it("groups DA -> vote -> completion into one completed deal", () => { + const deals = deriveDeals( + 1, + [ + ev("definitive_agreement", "2021-03-01"), + ev("vote", "2021-06-01"), + ev("completed", "2021-06-15"), + ], + [], + [], + [] + ); + expect(deals.length).toBe(1); + expect(deals[0].deal_index).toBe(0); + expect(deals[0].outcome).toBe("completed"); + expect(deals[0].announced_date).toBe("2021-03-01"); + expect(deals[0].definitive_agreement_date).toBe("2021-03-01"); + expect(deals[0].vote_date).toBe("2021-06-01"); + expect(deals[0].outcome_date).toBe("2021-06-15"); + }); + + it("splits a terminated attempt and a later completed attempt into two deals", () => { + const deals = deriveDeals( + 1, + [ + ev("definitive_agreement", "2021-01-01"), + ev("terminated", "2021-02-01"), + ev("definitive_agreement", "2021-05-01"), + ev("completed", "2021-09-01"), + ], + [], + [], + [] + ); + expect(deals.map((d) => d.deal_index)).toEqual([0, 1]); + expect(deals[0].outcome).toBe("terminated"); + expect(deals[0].outcome_date).toBe("2021-02-01"); + expect(deals[1].outcome).toBe("completed"); + expect(deals[1].announced_date).toBe("2021-05-01"); + expect(deals[1].outcome_date).toBe("2021-09-01"); + }); + + it("ignores an extension vote with no open deal", () => { + const deals = deriveDeals(1, [ev("vote", "2021-04-01")], [], [], []); + expect(deals.length).toBe(0); + }); + + it("opens an already-completed deal when 2.01 has no preceding DA", () => { + const deals = deriveDeals(1, [ev("completed", "2021-09-01")], [], [], []); + expect(deals.length).toBe(1); + expect(deals[0].outcome).toBe("completed"); + expect(deals[0].announced_date).toBeNull(); + expect(deals[0].outcome_date).toBe("2021-09-01"); + }); + + it("assigns the same deal_index regardless of event insertion order", () => { + const ordered = deriveDeals( + 1, + [ + ev("definitive_agreement", "2021-01-01"), + ev("terminated", "2021-02-01"), + ev("definitive_agreement", "2021-05-01"), + ev("completed", "2021-09-01"), + ], + [], + [], + [] + ); + const shuffled = deriveDeals( + 1, + [ + ev("completed", "2021-09-01"), + ev("definitive_agreement", "2021-05-01"), + ev("definitive_agreement", "2021-01-01"), + ev("terminated", "2021-02-01"), + ], + [], + [], + [] + ); + // created_at is a wall-clock stamp for new rows; compare the derived fields. + const strip = (ds: typeof ordered) => ds.map(({ created_at, ...rest }) => rest); + expect(strip(shuffled)).toEqual(strip(ordered)); + }); + + it("preserves created_at from an existing deal row", () => { + const existing = [deal({ deal_index: 0, outcome: "pending", created_at: "2020-01-01T00:00:00.000Z" })]; + const deals = deriveDeals(1, [ev("definitive_agreement", "2021-03-01")], [], [], existing); + expect(deals[0].created_at).toBe("2020-01-01T00:00:00.000Z"); + }); + + it("derives target/pipe onto the deal whose window contains the proxy filing", () => { + const deals = deriveDeals( + 1, + [ev("definitive_agreement", "2021-03-01"), ev("completed", "2021-06-15")], + [ext("p1", "2021-05-01", { target_name: "Acme Target Inc.", pipe_amount: 150_000_000 })], + [], + [] + ); + expect(deals.length).toBe(1); + expect(deals[0].target_name).toBe("Acme Target Inc."); + expect(deals[0].pipe_amount).toBe(150_000_000); + }); + + it("lets a definitive proxy supersede an earlier preliminary one (latest non-null wins)", () => { + const deals = deriveDeals( + 1, + [ev("definitive_agreement", "2021-03-01"), ev("completed", "2021-06-15")], + [ + ext("prem", "2021-04-01", { form: "PREM14A", target_name: "Acme Target Inc.", pipe_amount: null }), + ext("defm", "2021-05-10", { form: "DEFM14A", target_name: "Acme Target, Inc.", pipe_amount: 200_000_000 }), + ], + [], + [] + ); + expect(deals[0].target_name).toBe("Acme Target, Inc."); // definitive wins + expect(deals[0].pipe_amount).toBe(200_000_000); + }); + + it("leaves an extraction with no matching open deal unattached", () => { + // proxy filed before any DA event -> no deal yet + const deals = deriveDeals(1, [], [ext("p1", "2021-05-01", { target_name: "Acme" })], [], []); + expect(deals.length).toBe(0); + }); + + it("routes two deals' proxies to the correct deal_index", () => { + const deals = deriveDeals( + 1, + [ + ev("definitive_agreement", "2021-01-01"), + ev("terminated", "2021-02-15"), + ev("definitive_agreement", "2021-05-01"), + ev("completed", "2021-09-01"), + ], + [ + ext("p0", "2021-01-20", { target_name: "First Target" }), + ext("p1", "2021-06-01", { target_name: "Second Target" }), + ], + [], + [] + ); + expect(deals.map((d) => d.target_name)).toEqual(["First Target", "Second Target"]); + }); + + it("sets proxy_date from a proxy event on the open deal", () => { + const deals = deriveDeals( + 1, + [ev("definitive_agreement", "2021-03-01"), ev("proxy", "2021-05-20")], + [], + [], + [] + ); + expect(deals[0].proxy_date).toBe("2021-05-20"); + }); +}); diff --git a/src/storage/spac/spacDealGrouping.ts b/src/storage/spac/spacDealGrouping.ts new file mode 100644 index 00000000..4a0c10f1 --- /dev/null +++ b/src/storage/spac/spacDealGrouping.ts @@ -0,0 +1,223 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { SpacDeal, SpacDealOutcome } from "./SpacDealSchema"; +import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; +import type { SpacMergerExtraction } from "./SpacMergerExtractionSchema"; +import type { SpacRedemptionExtraction } from "./SpacRedemptionExtractionSchema"; + +/** Event types that shape a business-combination attempt. */ +const DEAL_RELEVANT_EVENT_TYPES: readonly SpacEventType[] = [ + "definitive_agreement", + "terminated", + "completed", + "vote", + "proxy", +]; + +interface DealSkeleton { + deal_index: number; + announced_date: string | null; + definitive_agreement_date: string | null; + proxy_date: string | null; + vote_date: string | null; + outcome: SpacDealOutcome; + outcome_date: string | null; + source_accession: string | null; + // §4b columns, derived by correlating merger extractions (below). + target_name: string | null; + target_cik: number | null; + pipe_amount: number | null; + // Columns derived by correlating redemption extractions (below). + redemption_amount: number | null; + redemption_shares: number | null; +} + +/** + * Rebuild the full {@link SpacDeal} set for a CIK from its append-only events + * and merger-proxy extractions. + * + * Deterministic + replay-safe: events are ordered by `(event_date, + * accession_number)` and walked with a single "open deal" cursor, so the same + * event set always yields the same `deal_index` assignments. `source_accession` + * reflects the latest event that shaped the deal. 8-K-owned columns + * (`announced_date`, `definitive_agreement_date`, `vote_date`, `outcome`, + * `outcome_date`) and `proxy_date` come from the event walk. + * + * §4b-owned columns (`target_name`, `target_cik`, `pipe_amount`) are **derived** + * by correlating each {@link SpacMergerExtraction} to the deal whose + * `[announced, closed)` window contains the proxy's `filing_date` (definitive + * supersedes preliminary; latest non-null wins). Redemption columns + * (`redemption_amount`, `redemption_shares`) are derived from + * {@link SpacRedemptionExtraction} rows by announcement window (upper bound is the + * next deal's announcement, not the current deal's outcome_date, so a redemption + * reported at or after closing still attaches to that deal). `created_at` is + * preserved from any existing row. + */ +export function deriveDeals( + cik: number, + events: readonly SpacEvent[], + mergerExtractions: readonly SpacMergerExtraction[], + redemptionExtractions: readonly SpacRedemptionExtraction[], + existingDeals: readonly SpacDeal[] +): SpacDeal[] { + const relevant = events + .filter((e) => DEAL_RELEVANT_EVENT_TYPES.includes(e.event_type as SpacEventType)) + .sort( + (a, b) => + a.event_date.localeCompare(b.event_date) || + a.accession_number.localeCompare(b.accession_number) + ); + + const skeletons: DealSkeleton[] = []; + let open: DealSkeleton | null = null; + let nextIndex = 0; + + const openNew = (e: SpacEvent): DealSkeleton => { + const d: DealSkeleton = { + deal_index: nextIndex++, + announced_date: null, + definitive_agreement_date: null, + proxy_date: null, + vote_date: null, + outcome: "pending", + outcome_date: null, + source_accession: e.accession_number, + target_name: null, + target_cik: null, + pipe_amount: null, + redemption_amount: null, + redemption_shares: null, + }; + skeletons.push(d); + return d; + }; + + for (const e of relevant) { + switch (e.event_type) { + case "definitive_agreement": { + if (!open) open = openNew(e); + if (open.announced_date == null) open.announced_date = e.event_date; + if ( + open.definitive_agreement_date == null || + e.event_date > open.definitive_agreement_date + ) { + open.definitive_agreement_date = e.event_date; + } + open.source_accession = e.accession_number; + break; + } + case "terminated": { + if (open) { + open.outcome = "terminated"; + open.outcome_date = e.event_date; + open.source_accession = e.accession_number; + open = null; + } + break; + } + case "completed": { + const d = open ?? openNew(e); + d.outcome = "completed"; + d.outcome_date = e.event_date; + d.source_accession = e.accession_number; + open = null; + break; + } + case "vote": { + if (open) { + if (open.vote_date == null || e.event_date > open.vote_date) { + open.vote_date = e.event_date; + } + open.source_accession = e.accession_number; + } + break; + } + case "proxy": { + if (open) { + if (open.proxy_date == null || e.event_date > open.proxy_date) { + open.proxy_date = e.event_date; + } + open.source_accession = e.accession_number; + } + // No open deal -> proxy with no announced deal: timeline-only. + break; + } + } + } + + // --- Correlate merger extractions onto deals by filing-date window --- + // A deal owns [lower, upper): lower = its announced/DA date, upper = its + // outcome_date else the next deal's announced date else open-ended. + for (let i = 0; i < skeletons.length; i++) { + const d = skeletons[i]; + const lower = d.announced_date ?? d.definitive_agreement_date ?? null; + const upper = d.outcome_date ?? skeletons[i + 1]?.announced_date ?? null; + const matched = mergerExtractions + .filter( + (m) => + (lower == null || m.filing_date >= lower) && (upper == null || m.filing_date < upper) + ) + .sort((a, b) => a.filing_date.localeCompare(b.filing_date)); + // Latest non-null wins per field; earlier non-nulls survive when later is null. + for (const m of matched) { + if (m.target_name != null) d.target_name = m.target_name; + if (m.target_cik != null) d.target_cik = m.target_cik; + if (m.pipe_amount != null) d.pipe_amount = m.pipe_amount; + } + } + + // --- Correlate redemption extractions onto deals by announcement window --- + // The deals contiguously partition the timeline: deal i owns [B(i-1), B(i)), + // where B(k) is the boundary between deal k and deal k+1 = the next deal's + // earliest date (announced/DA/outcome). The first deal's lower bound is + // unbounded (B(-1) = null) so a redemption reported before the first recorded + // deal date — e.g. a vote-results 8-K for a deal opened only by `completed` + // (no 1.01), whose only date is its later outcome_date — still attaches. + // Unlike the merger window this ignores outcome_date for the upper bound, so a + // redemption reported at/after closing still attaches to the deal being closed. + const dealLower = (d: DealSkeleton): string | null => + d.announced_date ?? d.definitive_agreement_date ?? d.outcome_date ?? null; + for (let i = 0; i < skeletons.length; i++) { + const d = skeletons[i]; + const lower = i === 0 ? null : dealLower(d); + const upper = skeletons[i + 1] ? dealLower(skeletons[i + 1]) : null; + const matched = redemptionExtractions + .filter( + (r) => + (lower == null || r.filing_date >= lower) && (upper == null || r.filing_date < upper) + ) + .sort((a, b) => a.filing_date.localeCompare(b.filing_date)); + // Latest non-null wins per field; earlier non-nulls survive when a later filing omits them. + for (const r of matched) { + if (r.redemption_amount != null) d.redemption_amount = r.redemption_amount; + if (r.redemption_shares != null) d.redemption_shares = r.redemption_shares; + } + } + + const existingByIndex = new Map(existingDeals.map((d) => [d.deal_index, d])); + return skeletons.map((s) => ({ + cik, + deal_index: s.deal_index, + // §4b columns: derived from correlated extractions (no merge-preserve). + target_name: s.target_name, + target_cik: s.target_cik, + pipe_amount: s.pipe_amount, + // proxy_date: derived from the proxy event in the walk. + proxy_date: s.proxy_date, + // Columns derived from correlated redemption extractions. + redemption_amount: s.redemption_amount, + redemption_shares: s.redemption_shares, + // 8-K-owned columns: + announced_date: s.announced_date, + definitive_agreement_date: s.definitive_agreement_date, + vote_date: s.vote_date, + outcome: s.outcome, + outcome_date: s.outcome_date, + source_accession: s.source_accession, + created_at: existingByIndex.get(s.deal_index)?.created_at ?? new Date().toISOString(), + })); +} diff --git a/src/storage/versioning/componentRegistry.test.ts b/src/storage/versioning/componentRegistry.test.ts index 4116a8a8..a37b7c17 100644 --- a/src/storage/versioning/componentRegistry.test.ts +++ b/src/storage/versioning/componentRegistry.test.ts @@ -41,7 +41,7 @@ describe("componentRegistry", () => { }); it("listRegisteredComponents returns one entry per extractor and resolver", () => { - // 13 extractors + 4 resolvers (person, company, sponsor-family, underwriter-family). - expect(listRegisteredComponents()).toHaveLength(17); + // 15 extractors + 4 resolvers (person, company, sponsor-family, underwriter-family). + expect(listRegisteredComponents()).toHaveLength(19); }); }); diff --git a/src/storage/versioning/extractorIds.test.ts b/src/storage/versioning/extractorIds.test.ts index a1f8bf10..cbbcf025 100644 --- a/src/storage/versioning/extractorIds.test.ts +++ b/src/storage/versioning/extractorIds.test.ts @@ -25,9 +25,17 @@ describe("extractorIds", () => { "CFPORTAL", "D", "S-1", + "merger-proxy", + "redemption", ]); }); + it("maps the merger proxies to extractor id 'merger-proxy'", () => { + for (const form of ["DEFM14A", "PREM14A", "DEFM14C", "PREM14C", "DEFR14A", "PRER14A"]) { + expect(formToExtractorId(form)).toBe("merger-proxy"); + } + }); + it("maps the 424 prospectus variants to extractor id '424'", () => { for (const form of ["424A", "424B1", "424B2", "424B3", "424B4", "424B5", "424B7"]) { expect(formToExtractorId(form)).toBe("424"); diff --git a/src/storage/versioning/extractorIds.ts b/src/storage/versioning/extractorIds.ts index 28c10721..a20f2c67 100644 --- a/src/storage/versioning/extractorIds.ts +++ b/src/storage/versioning/extractorIds.ts @@ -18,6 +18,8 @@ export const EXTRACTOR_IDS = [ "S-1", "424", "8-K", + "merger-proxy", + "redemption", ] as const; export type ExtractorId = (typeof EXTRACTOR_IDS)[number]; @@ -77,6 +79,12 @@ export const FORM_TO_EXTRACTOR_ID: Readonly> = { "424B7": "424", "8-K": "8-K", "8-K/A": "8-K", + DEFM14A: "merger-proxy", + PREM14A: "merger-proxy", + DEFM14C: "merger-proxy", + PREM14C: "merger-proxy", + DEFR14A: "merger-proxy", + PRER14A: "merger-proxy", }; export function formToExtractorId(form: string): ExtractorId | undefined { diff --git a/src/task/forms/ProcessAccessionDocFormTask.redemption.test.ts b/src/task/forms/ProcessAccessionDocFormTask.redemption.test.ts new file mode 100644 index 00000000..48fea4f0 --- /dev/null +++ b/src/task/forms/ProcessAccessionDocFormTask.redemption.test.ts @@ -0,0 +1,120 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { beforeEach, describe, expect, it } from "bun:test"; +import type { IExecuteContext } from "workglow"; +import { globalServiceRegistry } from "workglow"; +import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; +import { setupAllDatabases } from "../../config/setupAllDatabases"; +import { FILING_REPOSITORY_TOKEN } from "../../storage/filing/FilingSchema"; +import { SpacReportWriter } from "../../storage/spac/SpacReportWriter"; +import { ProcessAccessionDocFormTask } from "./ProcessAccessionDocFormTask"; + +class CapturingTask extends ProcessAccessionDocFormTask { + public readonly fetched: string[] = []; + + protected override async runFetch( + _cik: number, + _accessionNumber: string, + fileName: string, + _context: IExecuteContext + ): Promise { + this.fetched.push(fileName); + return ""; + } +} + +async function seedSpac(cik: number): Promise { + await new SpacReportWriter().recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Redeem SPAC Inc.", + spac_sic: 6770, + }); +} + +async function seedFiling(opts: { + readonly cik: number; + readonly accession_number: string; + readonly form: string; + readonly primary_doc: string; + readonly items: string; +}): Promise { + const repo = globalServiceRegistry.get(FILING_REPOSITORY_TOKEN); + await repo.put({ + cik: opts.cik, + accession_number: opts.accession_number, + form: opts.form, + primary_doc: opts.primary_doc, + file_number: "", + filing_date: "2026-03-20", + acceptance_date: "2026-03-20T00:00:00.000Z", + report_date: "2026-03-19", + film_number: null, + primary_doc_description: null, + size: null, + is_xbrl: null, + is_inline_xbrl: null, + items: opts.items, + act: null, + } as never); +} + +describe("ProcessAccessionDocFormTask redemption fetch escalation", () => { + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + }); + + it("fetches the full .txt for a known-SPAC trigger-item 8-K", async () => { + const accession = "0000000000-26-000007"; + await seedSpac(7); + await seedFiling({ + cik: 7, + accession_number: accession, + form: "8-K", + primary_doc: "primary.htm", + items: "5.07,9.01", + }); + const task = new CapturingTask(); + await task.run({ accessionNumber: accession }); + expect(task.fetched).toContain(`${accession}.txt`); + }); + + it("keeps the primary-doc fetch for a non-trigger item", async () => { + const accession = "0000000000-26-000008"; + await seedSpac(7); + await seedFiling({ + cik: 7, + accession_number: accession, + form: "8-K", + primary_doc: "primary.htm", + items: "2.02", + }); + const task = new CapturingTask(); + await task.run({ accessionNumber: accession }); + expect(task.fetched).toContain("primary.htm"); + expect(task.fetched).not.toContain(`${accession}.txt`); + }); + + it("keeps the primary-doc fetch for a non-SPAC CIK", async () => { + const accession = "0000000000-26-000010"; + await seedFiling({ + cik: 99, + accession_number: accession, + form: "8-K", + primary_doc: "primary.htm", + items: "5.07", + }); + const task = new CapturingTask(); + await task.run({ accessionNumber: accession }); + expect(task.fetched).toContain("primary.htm"); + expect(task.fetched).not.toContain(`${accession}.txt`); + }); +}); diff --git a/src/task/forms/ProcessAccessionDocFormTask.ts b/src/task/forms/ProcessAccessionDocFormTask.ts index 750baac5..1d02908a 100644 --- a/src/task/forms/ProcessAccessionDocFormTask.ts +++ b/src/task/forms/ProcessAccessionDocFormTask.ts @@ -25,7 +25,10 @@ import { processForm144 } from "../../sec/forms/insider-trading/Form_144.storage import { processFormS1 } from "../../sec/forms/registration-statements/Form_S_1.storage"; import { processForm424 } from "../../sec/forms/registration-statements/Form_424.storage"; import { processForm8K } from "../../sec/forms/miscellaneous-filings/Form_8_K.storage"; +import { processMergerProxy } from "../../sec/forms/proxies-information-statements/Form_DEFM14A.storage"; +import { hasRedemptionTriggerItem } from "../../sec/forms/miscellaneous-filings/spac8kRedemptionTriggers"; import { TypeSecCik } from "../../sec/submissions/EnititySubmissionSchema"; +import { SpacRepo } from "../../storage/spac/SpacRepo"; import { ExtractionDeadLetterRepo } from "../../storage/dead-letter/ExtractionDeadLetterRepo"; import { FILING_REPOSITORY_TOKEN } from "../../storage/filing/FilingSchema"; import { COMPONENT_VERSION_REPOSITORY_TOKEN } from "../../storage/versioning/ComponentVersionSchema"; @@ -192,6 +195,21 @@ export class ProcessAccessionDocFormTask extends Task< fileName = fullSubmissionFileName(accessionNumber); } + // Known-SPAC 8-Ks carrying a redemption-trigger item are fetched as the full + // submission .txt so the redemption pass can read the EX-99 vote-results + // exhibit, not just the primary document. Other 8-Ks keep their primary-doc + // fetch. + let redemptionFullSubmission = false; + if ( + (form === "8-K" || form === "8-K/A") && + hasRedemptionTriggerItem(items) && + cik !== undefined && + (await new SpacRepo().getSpac(cik)) !== undefined + ) { + fileName = fullSubmissionFileName(accessionNumber); + redemptionFullSubmission = true; + } + const extractorId = formToExtractorId(form); if (!extractorId) { throw new TaskError(`No extractor registered for form '${form}'`); @@ -355,7 +373,22 @@ export class ProcessAccessionDocFormTask extends Task< break; case "8-K": case "8-K/A": - await processForm8K({ ...storageArgs, form: form!, items, report_date, form8K: parsed }); + await processForm8K({ + ...storageArgs, + form: form!, + items, + report_date, + form8K: parsed, + fullSubmissionText: redemptionFullSubmission ? text : undefined, + }); + break; + case "DEFM14A": + case "PREM14A": + case "DEFM14C": + case "PREM14C": + case "DEFR14A": + case "PRER14A": + await processMergerProxy({ ...storageArgs, form: form!, formMergerProxy: parsed }); break; default: throw new TaskError(`Form '${form}' has no storage handler`); diff --git a/src/task/spac/BackfillRedemptionsTask.test.ts b/src/task/spac/BackfillRedemptionsTask.test.ts new file mode 100644 index 00000000..92d1927f --- /dev/null +++ b/src/task/spac/BackfillRedemptionsTask.test.ts @@ -0,0 +1,89 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { beforeEach, describe, expect, it } from "bun:test"; +import { globalServiceRegistry } from "workglow"; +import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; +import { setupAllDatabases } from "../../config/setupAllDatabases"; +import { FILING_REPOSITORY_TOKEN } from "../../storage/filing/FilingSchema"; +import { SpacReportWriter } from "../../storage/spac/SpacReportWriter"; +import { + BackfillRedemptionsTask, + selectRedemptionBackfillAccessions, +} from "./BackfillRedemptionsTask"; + +async function seedSpac(cik: number): Promise { + await new SpacReportWriter().recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Backfill SPAC Inc.", + spac_sic: 6770, + }); +} + +async function seedFiling(opts: { + readonly cik: number; + readonly accession_number: string; + readonly form: string; + readonly items: string; +}): Promise { + const repo = globalServiceRegistry.get(FILING_REPOSITORY_TOKEN); + await repo.put({ + cik: opts.cik, + accession_number: opts.accession_number, + form: opts.form, + primary_doc: "primary.htm", + file_number: "", + filing_date: "2026-03-20", + acceptance_date: "2026-03-20T00:00:00.000Z", + report_date: "2026-03-19", + film_number: null, + primary_doc_description: null, + size: null, + is_xbrl: null, + is_inline_xbrl: null, + items: opts.items, + act: null, + } as never); +} + +describe("selectRedemptionBackfillAccessions", () => { + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + }); + + async function seedFixture(): Promise { + await seedSpac(5); + await seedFiling({ cik: 5, accession_number: "acc-trigger", form: "8-K", items: "5.07" }); + await seedFiling({ cik: 5, accession_number: "acc-trigger-amend", form: "8-K/A", items: "2.01" }); + await seedFiling({ cik: 5, accession_number: "acc-2.02", form: "8-K", items: "2.02" }); + await seedFiling({ cik: 5, accession_number: "acc-10k", form: "10-K", items: "5.07" }); + // Non-SPAC cik: trigger-item 8-K, but no spac row. + await seedFiling({ cik: 6, accession_number: "acc-nonspac", form: "8-K", items: "5.07" }); + } + + it("selects known-SPAC trigger-item 8-Ks (incl. 8-K/A) only", async () => { + await seedFixture(); + + const accessions = await selectRedemptionBackfillAccessions(); + expect(accessions).toContain("acc-trigger"); + expect(accessions).toContain("acc-trigger-amend"); + expect(accessions).not.toContain("acc-2.02"); + expect(accessions).not.toContain("acc-10k"); + expect(accessions).not.toContain("acc-nonspac"); + }); + + it("dry-run reports the selected count without reprocessing", async () => { + await seedFixture(); + + const out = await new BackfillRedemptionsTask().run({ dryRun: true } as any); + expect(out.selected).toBe(2); + expect(out.processed).toBe(0); + }); +}); diff --git a/src/task/spac/BackfillRedemptionsTask.ts b/src/task/spac/BackfillRedemptionsTask.ts new file mode 100644 index 00000000..1ae3809f --- /dev/null +++ b/src/task/spac/BackfillRedemptionsTask.ts @@ -0,0 +1,95 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { Static, Type } from "typebox"; +import { globalServiceRegistry, IExecuteContext, Task, Workflow } from "workglow"; +import { FILING_REPOSITORY_TOKEN } from "../../storage/filing/FilingSchema"; +import { SpacRepo } from "../../storage/spac/SpacRepo"; +import { hasRedemptionTriggerItem } from "../../sec/forms/miscellaneous-filings/spac8kRedemptionTriggers"; +import { ProcessAccessionDocFormTask } from "../forms/ProcessAccessionDocFormTask"; + +/** + * Accession numbers of known-SPAC 8-Ks carrying a redemption-trigger item, + * enumerated from the bootstrapped `filing` metadata (no network discovery). + */ +export async function selectRedemptionBackfillAccessions(): Promise { + const filingRepo = globalServiceRegistry.get(FILING_REPOSITORY_TOKEN); + const spacRepo = new SpacRepo(); + const out: string[] = []; + const spacs = await spacRepo.getAllSpacs(); + for (const spac of spacs) { + // Query by (form, cik) — the filings storage is indexed on ["form", "cik"], + // so this loads only the SPAC's 8-Ks instead of scanning all its filings. + for (const form of ["8-K", "8-K/A"]) { + const filings = (await filingRepo.query({ form, cik: spac.cik })) ?? []; + for (const f of filings) { + if (hasRedemptionTriggerItem(f.items)) { + out.push(f.accession_number); + } + } + } + } + return out; +} + +const InputSchema = () => + Type.Object({ + dryRun: Type.Optional(Type.Boolean({ default: false })), + }); +export type BackfillRedemptionsTaskInput = Static>; + +const OutputSchema = () => + Type.Object({ + selected: Type.Number(), + processed: Type.Number(), + }); +type BackfillRedemptionsTaskOutput = Static>; + +/** + * Sweeps historical known-SPAC trigger-item 8-Ks and re-runs + * {@link ProcessAccessionDocFormTask} for each so the redemption extractor + * (which now escalates to the full submission and extracts) runs over filings + * that were processed before it existed. + */ +export class BackfillRedemptionsTask extends Task< + BackfillRedemptionsTaskInput, + BackfillRedemptionsTaskOutput +> { + static readonly type = "BackfillRedemptionsTask"; + static readonly category = "SEC"; + static readonly cacheable = false; + + static inputSchema() { + return InputSchema(); + } + + static outputSchema() { + return OutputSchema(); + } + + async execute( + input: BackfillRedemptionsTaskInput, + context: IExecuteContext + ): Promise { + const accessions = await selectRedemptionBackfillAccessions(); + if (input.dryRun) { + return { selected: accessions.length, processed: 0 }; + } + // Isolate per-filing failures: one bad 8-K (fetch error, malformed body) + // must not abort the sweep over the remaining accessions. + let processed = 0; + for (const accessionNumber of accessions) { + try { + const wf = context.own(new Workflow()); + wf.pipe(new ProcessAccessionDocFormTask()); + await wf.run({ accessionNumber }); + processed++; + } catch (err) { + console.error(`backfill-redemptions: failed to reprocess ${accessionNumber}:`, err); + } + } + return { selected: accessions.length, processed }; + } +}