Skip to content
2 changes: 1 addition & 1 deletion app/config/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ function loadConfig() {
withAttackDataModel: {
doc: 'Enable validation of POST and PUT request bodies using the ATT&CK Data Model',
format: Boolean,
default: false,
default: true,
env: 'VALIDATE_WITH_ADM_SCHEMAS',
},
withOpenApi: {
Expand Down
86 changes: 86 additions & 0 deletions app/lib/import-safety.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
'use strict';

/**
* Import-safety primitives.
*
* STIX-bundle import has a strict invariant: when a bundle is imported, the
* persisted objects must be byte-faithful to the bundle's `stix` content. The
* import path is allowed to populate Workbench-private metadata (everything
* under `workspace`), but it must NEVER alter the imported `stix` fields,
* because the bundle is the source of truth and round-trip fidelity matters
* for re-imports and downstream consumers.
*
* However, the lifecycle hooks and event listeners that fire during a normal
* create/update — `beforeCreate`, `afterCreate`, and the cross-service
* listeners on domain events — were not originally written with import
* fidelity in mind. Several of them mutate `stix.*` as part of their normal
* work (e.g. AnalyticsService.beforeCreate stamps `stix.name` from the
* ATT&CK ID; AnalyticsService.handleAnalyticsReferenced rewrites
* `stix.external_references` to embed a URL to the parent detection
* strategy). Those mutations are correct for user-driven POST/PUT flows but
* are incorrect for an import.
*
* Rather than rely on convention ("remember to gate every stix write behind
* `if (!options.import) { ... }`"), we enforce the contract structurally:
* before invoking any hook or listener in import mode, the framework calls
* `deepFreezeStix(doc)`. In Node strict mode (`'use strict'` at the top of
* every service file), an attempted assignment to a frozen property throws
* a `TypeError`. That makes a missing import gate fail loudly at the
* violating line on the first import test, instead of silently corrupting
* bundle content.
*
* The local rule for hook/listener authors becomes:
*
* 1. Workspace mutations are always allowed.
* 2. If you need to mutate `stix.*`, wrap the block in
* `if (!options.import) { ... }` (or `if (!payload.options?.import)`
* inside a listener). The framework guarantees that `options.import`
* is the only state where stix is frozen, so a missing gate produces
* an immediate TypeError pointing at the offending line.
*
* Read freely from frozen stix — only writes are blocked.
*/

/**
* Deep-freezes the `stix` field of a document so any attempt to write to
* `doc.stix.*`, including writes through nested arrays/objects (e.g.
* `doc.stix.external_references.unshift(...)`), throws a `TypeError`.
*
* `Object.freeze` is shallow on its own, so we walk the immediate children
* (one level into nested objects/arrays) and freeze them as well. Two
* levels is sufficient for STIX in practice: the deepest commonly mutated
* paths are array elements (e.g. `stix.external_references[i]` or
* `stix.kill_chain_phases[i]`), which the loop covers.
*
* Safe to call multiple times — `Object.isFrozen` short-circuits.
* Safe to call on Mongoose documents: only the underlying `_doc.stix`
* subtree is frozen; Mongoose's wrapper accessors remain functional, and
* Mongoose does not mutate the source object when constructing new
* documents during save/insertMany.
*
* @param {Object} doc - A document of the shape `{ stix, workspace }`, or
* a Mongoose document with the same shape. No-op if `doc` or `doc.stix`
* is missing.
*/
function deepFreezeStix(doc) {
const stix = doc?.stix;
if (!stix || typeof stix !== 'object' || Object.isFrozen(stix)) return;

Object.freeze(stix);

for (const value of Object.values(stix)) {
if (!value || typeof value !== 'object' || Object.isFrozen(value)) continue;
Object.freeze(value);
if (Array.isArray(value)) {
for (const item of value) {
if (item && typeof item === 'object' && !Object.isFrozen(item)) {
Object.freeze(item);
}
}
}
}
}

module.exports = {
deepFreezeStix,
};
17 changes: 15 additions & 2 deletions app/lib/validation-schemas.js
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ const STIX_SCHEMAS = {
'x-mitre-collection': collectionSchema,
};

// Cache for locally-derived partial schemas. ADM does not export prebuilt
// partials for every STIX type; for those types we call `.partial()` ourselves.
// That call is expensive enough to show up in bulk-import profiles, so we
// memoize the result per STIX type.
const derivedPartialCache = new Map();

/**
* Get the schema to use for validating a STIX object.
*
Expand All @@ -102,7 +108,7 @@ const STIX_SCHEMAS = {
* - `work-in-progress` uses partial validation so drafts can omit required fields
* - every other workflow state uses full validation
* - if ADM exports a dedicated partial schema, use it directly
* - otherwise, derive a partial schema locally with `.partial()`
* - otherwise, derive a partial schema locally with `.partial()` (memoized)
*
* @param {string} stixType - The STIX `type` being validated (e.g. "attack-pattern")
* @param {string} status - The workflow state (e.g. "work-in-progress", "awaiting-review", "reviewed")
Expand All @@ -120,7 +126,14 @@ function getSchema(stixType, status) {
return isWip ? admSchemaRef.partial : admSchemaRef.full;
}

return isWip ? admSchemaRef.partial() : admSchemaRef;
if (!isWip) return admSchemaRef;

let derived = derivedPartialCache.get(stixType);
if (!derived) {
derived = admSchemaRef.partial();
derivedPartialCache.set(stixType, derived);
}
return derived;
}

module.exports = {
Expand Down
118 changes: 118 additions & 0 deletions app/repository/_base.repository.js
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,124 @@ class BaseRepository extends AbstractRepository {
}
}

/**
* Bulk insert. Used by the STIX bundle import path to avoid one round-trip
* per object.
*
* `ordered: false` keeps MongoDB inserting the remaining docs after an
* individual failure. `throwOnValidationError: true` is critical: without
* it, Mongoose's `insertMany` silently drops documents that fail schema
* validation (e.g. a required field is missing) and reports success for
* the remaining valid docs — leaving the caller unable to record per-object
* import errors. With the flag, Mongoose throws a `MongooseBulkWriteError`
* after attempting the valid docs, carrying both the validation errors and
* the `results` array we use to map each failure back to its source index.
*
* Discriminator-aware: each child model's `insertMany` sets the correct
* `__t` discriminator key automatically, so callers should invoke this on
* the type-specific repository (not the AttackObject parent).
*
* @param {Array<Object>} dataArr - Array of plain objects to insert
* @param {Object} [options]
* @param {boolean} [options.ordered=false] - Stop on first error if true
* @returns {Promise<{ inserted: Array, errors: Array<{ index, message, code }> }>}
* `errors[].index` is the index into the input `dataArr`; the caller can
* use it to recover the original document for error reporting.
*/
async saveMany(dataArr, { ordered = false } = {}) {
if (!Array.isArray(dataArr) || dataArr.length === 0) {
return { inserted: [], errors: [] };
}
try {
const inserted = await this.model.insertMany(dataArr, {
ordered,
throwOnValidationError: true,
});
return { inserted, errors: [] };
} catch (err) {
// MongooseBulkWriteError: one or more docs failed Mongoose schema
// validation. `err.results` mirrors the input order — successfully
// inserted entries are Mongoose documents (identifiable by `_id`),
// while failures are the original input objects (no `_id`). Walking
// the results in order, the k-th failure corresponds to
// `err.validationErrors[k]` (Mongoose pre-sorts validationErrors by
// source index).
if (err?.name === 'MongooseBulkWriteError') {
const errors = [];
const inserted = [];
const validationErrors = err.validationErrors || [];
const results = err.results || [];
let veIdx = 0;
for (let i = 0; i < results.length; i++) {
const r = results[i];
if (r && r._id) {
inserted.push(r);
} else {
const ve = validationErrors[veIdx++];
errors.push({
index: i,
message: ve?.message ?? 'Mongoose validation error',
code: ve?.name || 'ValidationError',
});
}
}
return { inserted, errors };
}
// MongoDB driver-side failure (e.g., duplicate-key race). Per-doc
// errors are on `err.writeErrors`; successful inserts on
// `err.insertedDocs`.
if (err?.name === 'MongoBulkWriteError' || err?.writeErrors) {
const errors = (err.writeErrors || []).map((we) => ({
index: we.index ?? we.err?.index,
message: we.errmsg || we.err?.errmsg || we.message,
code: we.code || we.err?.code,
}));
return { inserted: err.insertedDocs || [], errors };
}
throw new DatabaseError(err);
}
}

/**
* Retrieve every version of every document whose `stix.id` is in `stixIds`.
* Returns a Map keyed by stixId, value is an array of versions sorted
* newest-first (matching `retrieveAllById`'s ordering).
*
* Used by the bundle-import path to pre-fetch all existing versions in one
* query instead of N queries (one per imported object).
*
* @param {Array<string>} stixIds - List of STIX IDs to look up
* @returns {Promise<Map<string, Array<Object>>>}
*/
async retrieveAllByStixIds(stixIds) {
if (!Array.isArray(stixIds) || stixIds.length === 0) {
return new Map();
}

try {
const documents = await this.model
.find({ 'stix.id': { $in: stixIds } })
.sort('-stix.modified')
.select('-_id -__v -__t')
.lean()
.exec();

const byStixId = new Map();
for (const doc of documents) {
const id = doc.stix.id;
let arr = byStixId.get(id);
if (!arr) {
arr = [];
byStixId.set(id, arr);
}
arr.push(doc);
}
return byStixId;
} catch (err) {
throw new DatabaseError(err);
}
}

async updateAndSave(document, data) {
try {
// TODO validate that document is valid mongoose object first
Expand Down
Loading
Loading