Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions src/sec/forms/miscellaneous-filings/redemption8k.injection.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/**
* @license
* Copyright 2026 Steven Roussey <sroussey@gmail.com>
* SPDX-License-Identifier: Apache-2.0
*/

import { afterEach, beforeEach, describe, expect, it } from "bun:test";
import { resetDependencyInjectionsForTesting } from "../../../config/TestingDI";
import { setupAllDatabases } from "../../../config/setupAllDatabases";
import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter";
import { SpacRedemptionExtractionRepo } from "../../../storage/spac/SpacRedemptionExtractionRepo";
import { ExtractionDeadLetterRepo } from "../../../storage/dead-letter/ExtractionDeadLetterRepo";
import { MAX_STORED_SPAN_CHARS } from "../registration-statements/s1/verifySourceSpan";
import {
fakeS1Model,
registerFakeStructuredProvider,
} from "../registration-statements/s1/testing/fakeStructuredProvider";
import { processRedemption8K } from "./redemption8k";

const FULL_TXT =
"<SEC-HEADER>\nACCESSION NUMBER: 0000000000-26-injection\n</SEC-HEADER>\n" +
"<DOCUMENT>\n<TYPE>8-K\n<SEQUENCE>1\n<TEXT>\n<p>Vote results.</p>\n</TEXT>\n</DOCUMENT>\n" +
"<DOCUMENT>\n<TYPE>EX-99.1\n<SEQUENCE>2\n<TEXT>\n" +
"<p>Holders of 1,234,567 shares elected to redeem for $12,400,000.</p>\n" +
"</TEXT>\n</DOCUMENT>\n";

async function seedSpacWithDeal(cik: number): Promise<void> {
const writer = new SpacReportWriter();
await writer.recordRegistration({
cik,
accession_number: `${cik}-reg`,
filing_date: "2025-12-01",
form: "S-1",
primary_document: "s1.htm",
spac_name: "Redeem SPAC Inc.",
spac_sic: 6770,
});
await writer.recordDealMilestones({
cik,
accession_number: `${cik}-da`,
filing_date: "2026-01-10",
form: "8-K",
primary_document: null,
events: [{ event_type: "definitive_agreement", event_date: "2026-01-10" }],
});
}

describe("processRedemption8K prompt-injection seal", () => {
let cleanup: (() => void) | undefined;

beforeEach(async () => {
resetDependencyInjectionsForTesting();
await setupAllDatabases();
});
afterEach(() => {
cleanup?.();
cleanup = undefined;
});

it("verifyRowSpan rejects a 1001-char source_span at the gate and dead-letters UNVERIFIED_SOURCE_SPAN", async () => {
await seedSpacWithDeal(800);
const oversizedSpan = "X".repeat(MAX_STORED_SPAN_CHARS + 1);
expect(oversizedSpan.length).toBe(1001);
const { unregister } = registerFakeStructuredProvider([
{
redemption_shares: 999,
redemption_amount: 999,
price_per_share: 1,
confidence: 0.99,
source_span: oversizedSpan,
},
]);
cleanup = unregister;

await processRedemption8K({
cik: 800,
accession_number: "0000000000-26-injection",
filing_date: "2026-03-20",
form: "8-K",
itemCodes: ["5.07"],
fullSubmissionText: FULL_TXT,
model: fakeS1Model(),
});

expect(
await new SpacRedemptionExtractionRepo().getByAccession("0000000000-26-injection")
).toBeUndefined();
const dl = await new ExtractionDeadLetterRepo().listPending("redemption");
const red = dl.find((d) => d.section_name === "redemption");
expect(red?.reason_code).toBe("UNVERIFIED_SOURCE_SPAN");
});

it("persist site caps the stored source_span via boundSourceSpan at MAX_STORED_SPAN_CHARS", async () => {
await seedSpacWithDeal(801);
// A verbatim span from the EX-99.1 narrative persists unchanged.
const verbatim = "1,234,567 shares elected to redeem for $12,400,000";
expect(verbatim.length).toBeLessThanOrEqual(MAX_STORED_SPAN_CHARS);
const { unregister } = registerFakeStructuredProvider([
{
redemption_shares: 1234567,
redemption_amount: 12400000,
price_per_share: 10.05,
confidence: 0.95,
source_span: verbatim,
},
]);
cleanup = unregister;

await processRedemption8K({
cik: 801,
accession_number: "0000000000-26-injection-2",
filing_date: "2026-03-20",
form: "8-K",
itemCodes: ["5.07"],
fullSubmissionText: FULL_TXT,
model: fakeS1Model(),
});

const ext = await new SpacRedemptionExtractionRepo().getByAccession(
"0000000000-26-injection-2"
);
expect(ext).toBeDefined();
expect((ext?.source_span ?? "").length).toBeLessThanOrEqual(MAX_STORED_SPAN_CHARS);
expect(ext?.source_span).toBe(verbatim);
});
});
6 changes: 3 additions & 3 deletions src/sec/forms/miscellaneous-filings/redemption8k.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { globalServiceRegistry, renderMarkdown } from "workglow";
import { parseEdgarHtml } from "../../html/parseEdgarHtml";
import { parseEightKSubmission } from "../registration-statements/s1/parseSubmission";
import { makeRunSection } from "../registration-statements/s1/sectionRunner";
import { spanAppearsIn } from "../registration-statements/s1/verifySourceSpan";
import { boundSourceSpan, verifyRowSpan } from "../registration-statements/s1/verifySourceSpan";
import { extractRedemption } from "../registration-statements/s1/sectionExtractors";
import type { RedemptionRow } from "../registration-statements/s1/redemptionSchema";
import {
Expand Down Expand Up @@ -113,7 +113,7 @@ export async function processRedemption8K(args: ProcessRedemption8KArgs): Promis
notFoundDetail: "no primary/EX-99 narrative text",
emptyDetail: "no redemption returned",
lowConfidenceDetail: "below confidence floor",
verifyRow: (t, r) => spanAppearsIn(t, r.source_span),
verifyRow: (t, r) => verifyRowSpan(t, r.source_span),
unverifiedAllDetail: "redemption source_span not present in narrative text",
extract: async (t) => {
const row = await extractRedemption(t, model);
Expand All @@ -132,7 +132,7 @@ export async function processRedemption8K(args: ProcessRedemption8KArgs): Promis
redemption_amount: row.redemption_amount,
price_per_share: row.price_per_share,
confidence: row.confidence,
source_span: row.source_span,
source_span: boundSourceSpan(row.source_span),
model_id,
created_at: new Date().toISOString(),
});
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/**
* @license
* Copyright 2026 Steven Roussey <sroussey@gmail.com>
* SPDX-License-Identifier: Apache-2.0
*/

import { afterEach, beforeEach, describe, expect, it } from "bun:test";
import { resetDependencyInjectionsForTesting } from "../../../config/TestingDI";
import { setupAllDatabases } from "../../../config/setupAllDatabases";
import { SpacRepo } from "../../../storage/spac/SpacRepo";
import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter";
import { SpacMergerExtractionRepo } from "../../../storage/spac/SpacMergerExtractionRepo";
import { ExtractionDeadLetterRepo } from "../../../storage/dead-letter/ExtractionDeadLetterRepo";
import { MAX_STORED_SPAN_CHARS } from "../registration-statements/s1/verifySourceSpan";
import {
fakeS1Model,
registerFakeStructuredProvider,
} from "../registration-statements/s1/testing/fakeStructuredProvider";
import { Form_DEFM14A } from "./Form_DEFM14A";
import { processMergerProxy } from "./Form_DEFM14A.storage";

const FIXTURE = `${import.meta.dir}/mock_data/merger-proxy/defm14a_sample.txt`;

async function seedSpac(cik: number): Promise<void> {
const writer = new SpacReportWriter();
await writer.recordRegistration({
cik,
accession_number: `${cik}-reg`,
filing_date: "2020-12-01",
form: "S-1",
primary_document: "s1.htm",
spac_name: "Merge SPAC Inc.",
spac_sic: 6770,
});
await writer.recordDealMilestones({
cik,
accession_number: `${cik}-da`,
filing_date: "2021-03-05",
form: "8-K",
primary_document: null,
events: [{ event_type: "definitive_agreement", event_date: "2021-03-01" }],
});
}

async function runProxy(cik: number, accession_number: string): Promise<void> {
const txt = await Bun.file(FIXTURE).text();
const parsed = await Form_DEFM14A.parse("DEFM14A", txt);
await processMergerProxy({
cik,
file_number: "",
accession_number,
filing_date: "2021-05-01",
primary_doc: "proxy.htm",
form: "DEFM14A",
formMergerProxy: parsed,
model: fakeS1Model(),
});
}

describe("processMergerProxy prompt-injection seal", () => {
let cleanup: (() => void) | undefined;

beforeEach(async () => {
resetDependencyInjectionsForTesting();
await setupAllDatabases();
});
afterEach(() => {
cleanup?.();
cleanup = undefined;
});

it("verifyRowSpan rejects a 1001-char source_span at the gate, dead-letters UNVERIFIED_SOURCE_SPAN, persists nothing", async () => {
await seedSpac(700);
// The raw source_span exceeds the storage cap. Even though it would
// appear verbatim in a synthetically-large section text, verifyRowSpan
// rejects it BEFORE normalization, mirroring the S-1 storage-side cap.
const oversizedSpan = "X".repeat(MAX_STORED_SPAN_CHARS + 1);
expect(oversizedSpan.length).toBe(1001);
const { unregister } = registerFakeStructuredProvider([
{
target_name: "Mallory Inc.",
pipe_amount: 999_999,
merger_consideration: "fabricated",
confidence: 0.99,
source_span: oversizedSpan,
},
]);
cleanup = unregister;

await runProxy(700, "700-defm");

expect(await new SpacMergerExtractionRepo().getByAccession("700-defm")).toBeUndefined();
const dl = await new ExtractionDeadLetterRepo().listPending("merger-proxy");
const merger = dl.find((d) => d.section_name === "merger");
expect(merger?.reason_code).toBe("UNVERIFIED_SOURCE_SPAN");
});

it("persist site caps the stored source_span via boundSourceSpan at MAX_STORED_SPAN_CHARS", async () => {
await seedSpac(701);
// A row whose source_span verifies (short, present in fixture) persists
// unchanged: boundSourceSpan returns the span as-is at-or-below the cap.
const verbatim = "business combination with Acme Target Inc.";
expect(verbatim.length).toBeLessThanOrEqual(MAX_STORED_SPAN_CHARS);
const { unregister } = registerFakeStructuredProvider([
{
target_name: "Acme Target Inc.",
pipe_amount: 150_000_000,
merger_consideration: "$10 per share",
confidence: 0.95,
source_span: verbatim,
},
]);
cleanup = unregister;

await runProxy(701, "701-defm");

const ext = await new SpacMergerExtractionRepo().getByAccession("701-defm");
expect(ext).toBeDefined();
// Persisted span is bounded at the storage cap (here unchanged, since the
// raw span is well below the cap). The contract under test is that the
// call site flows through boundSourceSpan rather than persisting the
// model output verbatim.
expect((ext?.source_span ?? "").length).toBeLessThanOrEqual(MAX_STORED_SPAN_CHARS);
expect(ext?.source_span).toBe(verbatim);
});

it("rolling-up after a verifier reject does not surface the rejected target onto the spac row", async () => {
await seedSpac(702);
const oversizedSpan = "Y".repeat(MAX_STORED_SPAN_CHARS + 1);
const { unregister } = registerFakeStructuredProvider([
{
target_name: "Mallory Inc.",
pipe_amount: 1,
merger_consideration: "fabricated",
confidence: 0.99,
source_span: oversizedSpan,
},
]);
cleanup = unregister;

await runProxy(702, "702-defm");

const row = await new SpacRepo().getSpac(702);
expect(row?.target_name ?? null).toBeNull();
expect(row?.pipe_amount ?? null).toBeNull();
});
});
Loading