-
Notifications
You must be signed in to change notification settings - Fork 0
Add Form 8-K parsing and event storage infrastructure #68
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,6 +59,7 @@ describe("sec version CLI", () => { | |
| "4", | ||
| "424", | ||
| "5", | ||
| "8-K", | ||
| "C", | ||
| "CFPORTAL", | ||
| "D", | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -233,6 +233,11 @@ import { | |||||||
| XbrlFactPrimaryKeyNames, | ||||||||
| XbrlFactRowSchema, | ||||||||
| } from "../storage/xbrl/XbrlFactSchema"; | ||||||||
| import { | ||||||||
| FORM_8K_EVENT_REPOSITORY_TOKEN, | ||||||||
| Form8KEventPrimaryKeyNames, | ||||||||
| Form8KEventSchema, | ||||||||
| } from "../storage/form-8k-event/Form8KEventSchema"; | ||||||||
| import { | ||||||||
| CANONICAL_COMPANY_REPOSITORY_TOKEN, | ||||||||
| CanonicalCompanyPrimaryKeyNames, | ||||||||
|
|
@@ -781,4 +786,14 @@ export function resetDependencyInjectionsForTesting() { | |||||||
| XBRL_FACT_REPOSITORY_TOKEN, | ||||||||
| new InMemoryTabularStorage(XbrlFactRowSchema, XbrlFactPrimaryKeyNames, [["cik"], ["concept"]]) | ||||||||
| ); | ||||||||
|
|
||||||||
| // Form 8-K Events | ||||||||
| globalServiceRegistry.registerInstance( | ||||||||
| FORM_8K_EVENT_REPOSITORY_TOKEN, | ||||||||
| new InMemoryTabularStorage(Form8KEventSchema, Form8KEventPrimaryKeyNames, [ | ||||||||
| ["cik", "filing_date"], | ||||||||
| ["item_code"], | ||||||||
|
||||||||
| ["item_code"], | |
| ["item_code"], | |
| ["cik", "accession_number"], |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| /** | ||
| * @license | ||
| * Copyright 2025 Steven Roussey <sroussey@gmail.com> | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| import { Type, Static } from "typebox"; | ||
| import { SCHEMA_VERSION_TYPE, CIK_TYPE } from "../FormSchemaUtil"; | ||
|
|
||
| export const SubTypeList = Type.Union([Type.Literal("8-K"), Type.Literal("8-K/A")], { | ||
| description: "Submission Type Form", | ||
| }); | ||
|
|
||
| const SIGNATURE_TYPE = Type.Object({ | ||
| signatureName: Type.String({ minLength: 1, maxLength: 150 }), | ||
| signatureTitle: Type.Optional(Type.String({ maxLength: 150 })), | ||
| signatureDate: Type.Optional(Type.String()), | ||
| }); | ||
|
|
||
| export type Form8KSignature = Static<typeof SIGNATURE_TYPE>; | ||
|
|
||
| const SIGNATURE_BLOCK_TYPE = Type.Object({ | ||
| signature: Type.Union([SIGNATURE_TYPE, Type.Array(SIGNATURE_TYPE)]), | ||
| }); | ||
|
|
||
| const FILER_INFO_TYPE = Type.Object({ | ||
| filerCik: Type.Optional(CIK_TYPE), | ||
| filerCcc: Type.Optional(Type.String({ maxLength: 8 })), | ||
| }); | ||
|
|
||
| const HEADER_DATA_TYPE = Type.Object({ | ||
| filerInfo: Type.Optional(FILER_INFO_TYPE), | ||
| }); | ||
|
|
||
| const FORM_DATA_TYPE = Type.Object({ | ||
| items: Type.Optional( | ||
| Type.Object({ | ||
| item: Type.Union([Type.String(), Type.Array(Type.String())]), | ||
| }) | ||
| ), | ||
| periodOfReport: Type.Optional(Type.String()), | ||
| signatureBlock: Type.Optional(SIGNATURE_BLOCK_TYPE), | ||
| }); | ||
|
|
||
| /** | ||
| * Schema for 8-K filings submitted as structured XML through EDGAR. | ||
| */ | ||
| export const Form8KSchema = Type.Object({ | ||
| schemaVersion: Type.Optional(SCHEMA_VERSION_TYPE), | ||
| submissionType: Type.Optional(SubTypeList), | ||
| headerData: Type.Optional(HEADER_DATA_TYPE), | ||
| formData: Type.Optional(FORM_DATA_TYPE), | ||
| }); | ||
|
|
||
| export type Form8K = Static<typeof Form8KSchema>; | ||
|
|
||
| export const Form8KSubmissionSchema = Type.Object({ | ||
| edgarSubmission: Form8KSchema, | ||
| }); | ||
|
|
||
| export type Form8KSubmission = Static<typeof Form8KSubmissionSchema>; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| /** | ||
| * @license | ||
| * Copyright 2025 Steven Roussey <sroussey@gmail.com> | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| import { Form8KEventRepo } from "../../../storage/form-8k-event/Form8KEventRepo"; | ||
| import type { Form8KEvent } from "../../../storage/form-8k-event/Form8KEventSchema"; | ||
| import type { Form8K } from "./Form_8_K.schema"; | ||
| import { Form_8_K_ITEMS } from "./Form_8_K"; | ||
|
|
||
| /** | ||
| * Extracts item codes from the filing metadata `items` field. | ||
| * The items field is a comma-separated string of item codes (e.g., "2.02,9.01"). | ||
| * Also merges any items found in the parsed XML form data. | ||
| */ | ||
| function extractItemCodes(filingItems: string | undefined | null, form8K: Form8K): string[] { | ||
| const itemSet = new Set<string>(); | ||
|
|
||
| if (filingItems) { | ||
| for (const raw of filingItems.split(/[,;]/)) { | ||
| const item = raw.trim(); | ||
| if (item) { | ||
| itemSet.add(item); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if (form8K.formData?.items?.item) { | ||
| const xmlItems = form8K.formData.items.item; | ||
| const itemArray = Array.isArray(xmlItems) ? xmlItems : [xmlItems]; | ||
| for (const item of itemArray) { | ||
| const trimmed = item.trim(); | ||
| if (trimmed) { | ||
| itemSet.add(trimmed); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return [...itemSet].sort(); | ||
| } | ||
|
|
||
| export async function processForm8K({ | ||
| cik, | ||
| accession_number, | ||
| filing_date, | ||
| form, | ||
| items, | ||
| report_date, | ||
| form8K, | ||
| }: { | ||
| readonly cik: number; | ||
| readonly accession_number: string; | ||
| readonly filing_date: string; | ||
| readonly form: string; | ||
| readonly items: string | undefined | null; | ||
| readonly report_date: string | undefined | null; | ||
| readonly form8K: Form8K; | ||
| }): Promise<void> { | ||
| const eventRepo = new Form8KEventRepo(); | ||
| const isAmendment = form === "8-K/A"; | ||
|
|
||
| const effectiveReportDate = form8K.formData?.periodOfReport || report_date || null; | ||
|
|
||
| const itemCodes = extractItemCodes(items, form8K); | ||
|
|
||
| for (const itemCode of itemCodes) { | ||
| const event: Form8KEvent = { | ||
| cik, | ||
| accession_number, | ||
| item_code: itemCode, | ||
| item_description: Form_8_K_ITEMS[itemCode] ?? null, | ||
| filing_date, | ||
| report_date: effectiveReportDate, | ||
| is_amendment: isAmendment, | ||
| }; | ||
| await eventRepo.saveEvent(event); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
form_8k_eventsstorage is indexed on["cik","filing_date"]and["item_code"], butForm8KEventRepo.getEventsByAccession()queries by{ cik, accession_number }. Without an index that includesaccession_numberthis query will likely degrade to a full scan. Add an index such as["cik","accession_number"](and optionally["accession_number"]/["cik"]depending on expected query patterns).