From 393a460193c6fab0fca0af8262f6bf1156c4c513 Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 17 Jun 2026 14:54:11 -0400 Subject: [PATCH 1/6] feat(integrations): add GCP Separation of Environments check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Separation of Environments evidence task had no check for ANY provider, so its integration picker was empty ("can't connect to any cloud provider"). Add a heuristic GCP check (→ separationOfEnvironments): it lists all accessible projects and classifies each into an environment (by `environment`/`env` label, else a name/id token matched with word boundaries so "product"/"developer" do NOT false-match "prod"/"dev"), and passes when ≥2 distinct environments are detected. Separation is an architectural property with no single API field, so the check is evidence-first: when it cannot confirm separation (single env, unclassified, or no projects) it emits actionable guidance (label projects, or upload a diagram) rather than a silent pass. It evaluates the whole project footprint, ignoring the project_ids scoping variable. Tagged service 'iam' (governance) to satisfy the manifest-service-tags invariant. Runs on both the manual and scheduled paths like the other GCP checks. Adds 11 tests (classifier word-boundary precision + check behavior). GCP coverage: 6 → 7 task templates (8 → 9 checks). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../gcp/checks/__tests__/gcp-checks.test.ts | 119 +++++++++++ .../gcp/checks/environment-separation.ts | 188 ++++++++++++++++++ .../src/manifests/gcp/checks/index.ts | 1 + .../src/manifests/gcp/index.ts | 2 + 4 files changed, 310 insertions(+) create mode 100644 packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts diff --git a/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts b/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts index b2552eb7a..b825bf7c6 100644 --- a/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts +++ b/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts @@ -8,6 +8,10 @@ import { cloudMonitoringAlertingCheck } from '../cloud-monitoring-alerting'; import { cloudSqlBackupsCheck } from '../cloud-sql-backups'; import { cloudSqlEncryptionCheck } from '../cloud-sql-encryption'; import { cloudSqlSslCheck } from '../cloud-sql-ssl'; +import { + classifyProjectEnv, + environmentSeparationCheck, +} from '../environment-separation'; import { iamPrimitiveRolesCheck } from '../iam-primitive-roles'; import { storageEncryptionCheck } from '../storage-encryption'; import { storagePublicAccessCheck } from '../storage-public-access'; @@ -883,3 +887,118 @@ describe('GCP Cloud SQL encryption check', () => { expect(out.failed[0]!.title).toMatch(/Could not verify Cloud SQL encryption/); }); }); + +describe('classifyProjectEnv — token matching', () => { + it('classifies by name token', () => { + expect(classifyProjectEnv({ projectId: 'myapp-prod' })).toBe('production'); + expect(classifyProjectEnv({ projectId: 'myapp-dev-123' })).toBe('development'); + expect(classifyProjectEnv({ projectId: 'web-staging' })).toBe('staging'); + }); + + it('prefers an explicit environment label over the name', () => { + expect( + classifyProjectEnv({ projectId: 'proj-001', labels: { environment: 'production' } }), + ).toBe('production'); + expect( + classifyProjectEnv({ projectId: 'proj-002', labels: { env: 'qa' } }), + ).toBe('test'); + }); + + it('does NOT false-match substrings like product/developer', () => { + expect(classifyProjectEnv({ projectId: 'product-catalog' })).toBeNull(); + expect(classifyProjectEnv({ projectId: 'developer-portal' })).toBeNull(); + expect(classifyProjectEnv({ projectId: 'data-warehouse' })).toBeNull(); + }); + + it('treats preprod as staging, not production', () => { + expect(classifyProjectEnv({ projectId: 'app-preprod' })).toBe('staging'); + }); +}); + +describe('GCP environment-separation check', () => { + const status = (err: Error, code: number) => { + (err as Error & { status: number }).status = code; + return err; + }; + + it('passes when ≥2 distinct environments are detected by name', async () => { + const out = await runCheck(environmentSeparationCheck, { + fetch: () => ({ + projects: [{ projectId: 'myapp-prod' }, { projectId: 'myapp-dev' }], + }), + }); + expect(out.failed).toHaveLength(0); + expect(out.passed).toHaveLength(1); + expect(out.passed[0]!.title).toMatch(/Environments separated/); + expect(out.passed[0]!.evidence).toMatchObject({ + detectedEnvironments: expect.arrayContaining(['production', 'development']), + }); + }); + + it('passes when environments are distinguished by labels', async () => { + const out = await runCheck(environmentSeparationCheck, { + fetch: () => ({ + projects: [ + { projectId: 'a', labels: { environment: 'production' } }, + { projectId: 'b', labels: { environment: 'staging' } }, + ], + }), + }); + expect(out.failed).toHaveLength(0); + expect(out.passed).toHaveLength(1); + }); + + it('evaluates projects across multiple pages', async () => { + const out = await runCheck(environmentSeparationCheck, { + fetch: (url) => { + if (url.includes('pageToken=tok2')) { + return { projects: [{ projectId: 'app-dev' }] }; + } + return { projects: [{ projectId: 'app-prod' }], nextPageToken: 'tok2' }; + }, + }); + expect(out.failed).toHaveLength(0); + expect(out.passed).toHaveLength(1); + }); + + it('fails with guidance when only one environment is present', async () => { + const out = await runCheck(environmentSeparationCheck, { + fetch: () => ({ projects: [{ projectId: 'myapp-prod' }] }), + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not confirm environment separation/); + expect(out.failed[0]!.remediation).toMatch(/distinct GCP projects/); + }); + + it('fails with guidance when no project can be classified', async () => { + const out = await runCheck(environmentSeparationCheck, { + fetch: () => ({ + projects: [{ projectId: 'product-catalog' }, { projectId: 'backend' }], + }), + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not confirm environment separation/); + }); + + it('fails when no projects are accessible', async () => { + const out = await runCheck(environmentSeparationCheck, { + fetch: () => ({ projects: [] }), + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/No GCP projects detected/); + }); + + it('fails "could not verify" when the project list read fails', async () => { + const out = await runCheck(environmentSeparationCheck, { + fetch: () => { + throw status(new Error('HTTP 403: Forbidden'), 403); + }, + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not verify environment separation/); + }); +}); diff --git a/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts b/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts new file mode 100644 index 000000000..499cc08cd --- /dev/null +++ b/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts @@ -0,0 +1,188 @@ +import { TASK_TEMPLATES } from '../../../task-mappings'; +import type { CheckContext, IntegrationCheck } from '../../../types'; +import { + remediationForReadFailure, + toHttpReadFailure, +} from '../../http-read-failure'; + +interface GcpProject { + projectId: string; + name?: string; + labels?: Record; +} + +/** + * Environment classification by token. Production is listed first so it wins + * when a string could match more than one bucket. Tokens are matched with word + * boundaries (so "product"/"developer" do NOT match "prod"/"dev"). + */ +const ENV_PATTERNS: ReadonlyArray<{ env: string; re: RegExp }> = [ + { env: 'production', re: /\b(prod|production|prd|live)\b/i }, + { env: 'staging', re: /\b(staging|stage|stg|preprod|uat)\b/i }, + { env: 'development', re: /\b(dev|develop|development)\b/i }, + { env: 'test', re: /\b(test|testing|qa)\b/i }, + { env: 'sandbox', re: /\b(sandbox|sbx|demo)\b/i }, +]; + +// Label keys that conventionally carry the environment, checked before falling +// back to the project name/id — an explicit label is more authoritative. +const ENV_LABEL_KEYS = ['environment', 'env', 'stage', 'tier'] as const; + +/** Classify a project into an environment bucket, or null if undetermined. */ +export function classifyProjectEnv(project: GcpProject): string | null { + const labelValues = ENV_LABEL_KEYS.map((k) => project.labels?.[k]).filter( + (v): v is string => typeof v === 'string' && v.length > 0, + ); + // Explicit env labels first, then the project id / display name. + const haystacks = [...labelValues, project.projectId, project.name ?? '']; + for (const haystack of haystacks) { + for (const { env, re } of ENV_PATTERNS) { + if (re.test(haystack)) return env; + } + } + return null; +} + +/** List every active project the connection can see (bounded pagination). */ +async function listActiveProjects(ctx: CheckContext): Promise { + const projects: GcpProject[] = []; + let pageToken: string | undefined; + let pages = 0; + do { + const tokenParam = pageToken + ? `&pageToken=${encodeURIComponent(pageToken)}` + : ''; + const data = await ctx.fetch<{ + projects?: GcpProject[]; + nextPageToken?: string; + }>( + `/v1/projects?filter=${encodeURIComponent('lifecycleState:ACTIVE')}&pageSize=100${tokenParam}`, + ); + for (const p of data.projects ?? []) projects.push(p); + pageToken = + typeof data.nextPageToken === 'string' ? data.nextPageToken : undefined; + pages++; + } while (pageToken && pages < 20); + if (pageToken) { + ctx.warn( + 'GCP project discovery hit the page cap; some projects may not be evaluated for environment separation', + { pages, discovered: projects.length }, + ); + } + return projects; +} + +/** + * Separation of Environments check (heuristic). GCP's recommended pattern is a + * separate project per environment, so this infers separation from the project + * footprint: it classifies each accessible project into an environment (by + * `environment`/`env` label, else name/id token) and passes when TWO OR MORE + * distinct environments are present. + * + * It is intentionally evidence-first, not a hard gate: separation is an + * architectural property with no single API field, so when it cannot confirm + * separation it emits actionable guidance (label projects, or upload a diagram) + * rather than a silent pass. It evaluates ALL accessible projects regardless of + * the `project_ids` scoping variable, since separation is about the whole + * footprint. + */ +export const environmentSeparationCheck: IntegrationCheck = { + id: 'gcp-environment-separation', + name: 'Separation of environments — production isolated from non-production', + description: + 'Verify production and non-production workloads are separated across distinct GCP projects.', + service: 'iam', + taskMapping: TASK_TEMPLATES.separationOfEnvironments, + + run: async (ctx: CheckContext) => { + let projects: GcpProject[]; + try { + projects = await listActiveProjects(ctx); + } catch (err) { + const failure = toHttpReadFailure(err); + ctx.fail({ + title: 'Could not verify environment separation', + description: `GCP projects could not be listed (${failure.error}), so environment separation could not be evaluated.`, + resourceType: 'gcp-environment-separation', + resourceId: 'projects', + severity: 'medium', + remediation: remediationForReadFailure( + failure, + 'Grant resourcemanager.projects.list (e.g. roles/viewer) to the connection, then re-run the check.', + ), + evidence: { readError: failure.error }, + }); + return; + } + + if (projects.length === 0) { + ctx.fail({ + title: 'No GCP projects detected', + description: + 'No active GCP projects were accessible, so environment separation could not be evaluated.', + resourceType: 'gcp-environment-separation', + resourceId: 'projects', + severity: 'medium', + remediation: + 'Grant resourcemanager.projects.list to the connection (or select projects in the integration settings), then re-run — or upload a console screenshot / architecture diagram as evidence of environment separation.', + evidence: { projectCount: 0 }, + }); + return; + } + + const classified = projects.map((p) => ({ + projectId: p.projectId, + environment: classifyProjectEnv(p), + })); + const detected = [ + ...new Set( + classified + .map((c) => c.environment) + .filter((e): e is string => e !== null), + ), + ]; + + if (detected.length >= 2) { + ctx.pass({ + title: 'Environments separated across projects', + description: `Detected ${detected.length} distinct environments across ${projects.length} GCP project(s): ${detected.join(', ')}.`, + resourceType: 'gcp-environment-separation', + resourceId: 'projects', + evidence: { + detectedEnvironments: detected, + projectCount: projects.length, + projects: classified + .slice(0, 50) + .map((c) => ({ + projectId: c.projectId, + environment: c.environment ?? 'unclassified', + })), + }, + }); + return; + } + + ctx.fail({ + title: 'Could not confirm environment separation', + description: + detected.length === 1 + ? `Only one environment ("${detected[0]}") was detected across ${projects.length} project(s); production and non-production do not appear to be separated into distinct projects.` + : `No GCP project could be classified by environment across ${projects.length} project(s), so environment separation could not be confirmed.`, + resourceType: 'gcp-environment-separation', + resourceId: 'projects', + severity: 'medium', + remediation: + 'Separate production and non-production workloads into distinct GCP projects and label each with an `environment` label (e.g. environment=production, environment=staging). If you separate environments another way (e.g. VPCs or folders), upload a console screenshot or architecture diagram as evidence.', + evidence: { + detectedEnvironments: detected, + projectCount: projects.length, + projects: classified + .slice(0, 50) + .map((c) => ({ + projectId: c.projectId, + environment: c.environment ?? 'unclassified', + })), + }, + }); + }, +}; diff --git a/packages/integration-platform/src/manifests/gcp/checks/index.ts b/packages/integration-platform/src/manifests/gcp/checks/index.ts index cce40f838..efad32512 100644 --- a/packages/integration-platform/src/manifests/gcp/checks/index.ts +++ b/packages/integration-platform/src/manifests/gcp/checks/index.ts @@ -6,3 +6,4 @@ export { cloudSqlBackupsCheck } from './cloud-sql-backups'; export { cloudMonitoringAlertingCheck } from './cloud-monitoring-alerting'; export { storageEncryptionCheck } from './storage-encryption'; export { cloudSqlEncryptionCheck } from './cloud-sql-encryption'; +export { environmentSeparationCheck } from './environment-separation'; diff --git a/packages/integration-platform/src/manifests/gcp/index.ts b/packages/integration-platform/src/manifests/gcp/index.ts index 2cd7394c0..d9c0b84f0 100644 --- a/packages/integration-platform/src/manifests/gcp/index.ts +++ b/packages/integration-platform/src/manifests/gcp/index.ts @@ -4,6 +4,7 @@ import { cloudSqlBackupsCheck, cloudSqlEncryptionCheck, cloudSqlSslCheck, + environmentSeparationCheck, iamPrimitiveRolesCheck, storageEncryptionCheck, storagePublicAccessCheck, @@ -168,5 +169,6 @@ This is industry standard - all GCP security monitoring tools use the same scope cloudMonitoringAlertingCheck, storageEncryptionCheck, cloudSqlEncryptionCheck, + environmentSeparationCheck, ], }; From 76c505bf41e3d468fb3ec2005540ff4dd0e1271f Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 17 Jun 2026 15:19:34 -0400 Subject: [PATCH 2/6] feat(integrations): add AWS & Azure Separation of Environments checks + shared env classifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brings AWS and Azure to parity with GCP on the Separation of Environments task (previously empty for ALL providers). Also extracts a shared, token-exact environment classifier and fixes an underscore bug the review caught. - New manifests/environment-classification.ts: TOKEN-split (not substring) classification so "production"/"product" and "dev"/"developer" never collide AND any separator works (-, _, ., /). Fixes a latent bug where _-separated names (myapp_prod) classified to null under the previous \b-regex. Reads only env-key tag/label values (never arbitrary tags) so a stray tag can't fabricate an environment. - GCP env-separation refactored onto the shared classifier (gets the underscore fix). - AWS aws-environment-separation (service ec2-vpc): classifies non-default, available VPCs by Environment/Name tag across regions; passes on >=2 distinct environments. Account-per-environment is invisible from one connection (no Organizations access), so a single-account result fails (severity LOW) with guidance to connect each env account or upload a diagram. PASS wording scopes the claim to within-account network labeling, NOT cross-account isolation. - Azure azure-environment-separation (service policy): two-tier, evaluated PER TIER (never unioned) — subscriptions (real boundary) first, then resource groups (logical, disclosed as such). Footprint-wide (lists all enabled subs). All branches fail-with-guidance, never silent pass; read failures surface as "could not verify"; the task accepts manual evidence. +25 tests (shared classifier incl. underscore/preprod/product, AWS pure evaluators, Azure run() incl. the no-cross-tier-union safety case). 371 package tests pass; typecheck + build green. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../environment-classification.test.ts | 64 ++++++ .../aws/checks/__tests__/aws-checks.test.ts | 53 +++++ .../aws/checks/environment-separation.ts | 217 ++++++++++++++++++ .../src/manifests/aws/checks/index.ts | 1 + .../src/manifests/aws/index.ts | 2 + .../checks/__tests__/azure-checks.test.ts | 101 ++++++++ .../azure/checks/environment-separation.ts | 194 ++++++++++++++++ .../src/manifests/azure/checks/index.ts | 1 + .../src/manifests/azure/index.ts | 2 + .../manifests/environment-classification.ts | 69 ++++++ .../gcp/checks/__tests__/gcp-checks.test.ts | 3 +- .../gcp/checks/environment-separation.ts | 41 ++-- 12 files changed, 720 insertions(+), 28 deletions(-) create mode 100644 packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts create mode 100644 packages/integration-platform/src/manifests/aws/checks/environment-separation.ts create mode 100644 packages/integration-platform/src/manifests/azure/checks/environment-separation.ts create mode 100644 packages/integration-platform/src/manifests/environment-classification.ts diff --git a/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts b/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts new file mode 100644 index 000000000..c71bde6aa --- /dev/null +++ b/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it } from 'bun:test'; +import { + classifyEnvironment, + envTagValues, +} from '../environment-classification'; + +describe('classifyEnvironment — token-exact matching', () => { + it('classifies common environment tokens', () => { + expect(classifyEnvironment(['myapp-prod'])).toBe('production'); + expect(classifyEnvironment(['web-staging'])).toBe('staging'); + expect(classifyEnvironment(['svc-dev'])).toBe('development'); + expect(classifyEnvironment(['api-qa'])).toBe('test'); + expect(classifyEnvironment(['demo'])).toBe('sandbox'); + }); + + it('handles ANY separator including underscore (the bug the reviewer caught)', () => { + expect(classifyEnvironment(['myapp_prod'])).toBe('production'); + expect(classifyEnvironment(['prod_network'])).toBe('production'); + expect(classifyEnvironment(['dev_network'])).toBe('development'); + expect(classifyEnvironment(['myapp.prod'])).toBe('production'); + expect(classifyEnvironment(['rg/staging'])).toBe('staging'); + }); + + it('does NOT false-match substrings (product/developer/etc.)', () => { + expect(classifyEnvironment(['product-catalog'])).toBeNull(); + expect(classifyEnvironment(['developer-portal'])).toBeNull(); + expect(classifyEnvironment(['data-warehouse'])).toBeNull(); + expect(classifyEnvironment(['prod123'])).toBeNull(); // not a clean token + }); + + it('treats preprod as staging, not production', () => { + expect(classifyEnvironment(['app-preprod'])).toBe('staging'); + expect(classifyEnvironment(['preprod'])).toBe('staging'); + }); + + it('is case-insensitive and skips empty/undefined candidates', () => { + expect(classifyEnvironment(['PROD'])).toBe('production'); + expect(classifyEnvironment([undefined, '', 'svc-dev'])).toBe('development'); + }); + + it('returns the first matching candidate (authoritative source first)', () => { + // an explicit env value passed first wins over a later name + expect(classifyEnvironment(['production', 'thing-dev'])).toBe('production'); + }); + + it('returns null when nothing matches', () => { + expect(classifyEnvironment(['backend', 'frontend', 'vpc-0abc'])).toBeNull(); + }); +}); + +describe('envTagValues — only env-key tags, case-insensitive', () => { + it('reads environment-indicating keys regardless of case', () => { + expect(envTagValues({ Environment: 'production' })).toEqual(['production']); + expect(envTagValues({ env: 'prod', stage: 'dev' }).sort()).toEqual(['dev', 'prod']); + }); + + it('ignores non-environment tags (false-positive guard)', () => { + expect(envTagValues({ team: 'dev-team', costCenter: 'prod-123' })).toEqual([]); + }); + + it('returns [] for undefined tags', () => { + expect(envTagValues(undefined)).toEqual([]); + }); +}); diff --git a/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts b/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts index fddbfcf7d..5a8f446c7 100644 --- a/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts +++ b/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts @@ -1,6 +1,10 @@ import { describe, expect, it } from 'bun:test'; import { evaluateCloudTrail } from '../cloudtrail'; import { evaluateSecurityGroups } from '../ec2'; +import { + classifyVpcEnv, + evaluateEnvironmentSeparation, +} from '../environment-separation'; import { evaluateAccountSummary, evaluateIamAccount, @@ -953,3 +957,52 @@ describe('account-level findings carry AWS account attribution (cubic finding on expect(failed[0]!.description).toContain('AWS account 123456789012'); }); }); + +describe('AWS environment separation', () => { + it('classifyVpcEnv: env tag wins, then Name tag, incl. underscore', () => { + expect(classifyVpcEnv([{ Key: 'Environment', Value: 'production' }])).toBe('production'); + expect(classifyVpcEnv([{ Key: 'Name', Value: 'prod-vpc' }])).toBe('production'); + expect(classifyVpcEnv([{ Key: 'Name', Value: 'vpc_dev' }])).toBe('development'); + }); + + it('classifyVpcEnv: ignores non-env tags (no fabricated environment)', () => { + expect(classifyVpcEnv([{ Key: 'team', Value: 'dev-team' }])).toBeNull(); + expect(classifyVpcEnv(undefined)).toBeNull(); + }); + + it('passes when >=2 environments are found, without claiming cross-account isolation', () => { + const out = evaluateEnvironmentSeparation([ + { vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }, + { vpcId: 'vpc-2', region: 'us-east-1', environment: 'development' }, + ]); + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('pass'); + expect(out[0]!.description).toMatch(/not cross-account isolation/); + }); + + it('fails (low) with guidance when only one environment is detected', () => { + const out = evaluateEnvironmentSeparation([ + { vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }, + ]); + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('fail'); + expect(out[0]!.severity).toBe('low'); + expect(out[0]!.remediation).toMatch(/separate AWS account per environment/); + }); + + it('fails when no VPC can be classified', () => { + const out = evaluateEnvironmentSeparation([ + { vpcId: 'vpc-1', region: 'us-east-1', environment: null }, + { vpcId: 'vpc-2', region: 'us-east-1', environment: null }, + ]); + expect(out[0]!.kind).toBe('fail'); + }); + + it('fails (low) with guidance when there are no non-default VPCs', () => { + const out = evaluateEnvironmentSeparation([]); + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('fail'); + expect(out[0]!.severity).toBe('low'); + expect(out[0]!.evidence).toMatchObject({ vpcCount: 0 }); + }); +}); diff --git a/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts b/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts new file mode 100644 index 000000000..fb2b8cd40 --- /dev/null +++ b/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts @@ -0,0 +1,217 @@ +import { DescribeVpcsCommand, EC2Client } from '@aws-sdk/client-ec2'; +import { TASK_TEMPLATES } from '../../../task-mappings'; +import type { CheckContext, IntegrationCheck } from '../../../types'; +import { + classifyEnvironment, + envTagValues, +} from '../../environment-classification'; +import { + combineReadFailures, + emitOutcomes, + remediationForReadFailure, + resolveAwsSessionOrFail, + toReadFailure, + type CheckOutcome, + type ReadFailure, +} from './shared'; + +export interface VpcInfo { + vpcId: string; + region: string; + environment: string | null; +} + +// Shown on every "could not confirm" outcome. AWS's recommended separation is a +// separate ACCOUNT per environment, which is invisible from one connection (one +// account's role), so a single-account result is the EXPECTED shape for those +// customers — guide, never accuse. +const ACCOUNT_GUIDANCE = + 'If you separate environments using a separate AWS account per environment (the recommended pattern), connect each environment account as its own connection — this check evaluates one account at a time. Otherwise separate prod/non-prod into distinct VPCs and tag each (Environment=production / Environment=staging), or upload an architecture diagram as evidence.'; + +/** + * Classify a VPC into an environment from its tags: an explicit `environment` + * tag value first, then the `Name` tag value. Only env-key tag values and the + * Name tag are considered — arbitrary tag values are NOT scanned, so a stray + * `team=dev-team` tag can't fabricate a second environment. + */ +export function classifyVpcEnv( + tags: ReadonlyArray<{ Key?: string; Value?: string }> | undefined, +): string | null { + const tagMap: Record = {}; + for (const t of tags ?? []) { + if (typeof t.Key === 'string' && typeof t.Value === 'string') { + tagMap[t.Key] = t.Value; + } + } + const nameTag = Object.entries(tagMap).find( + ([k]) => k.toLowerCase() === 'name', + )?.[1]; + return classifyEnvironment([...envTagValues(tagMap), nameTag]); +} + +/** + * Pure verdict from the account's non-default VPCs. PASS only when >=2 distinct + * environments are positively observed; otherwise fail-with-guidance (never a + * silent pass). The PASS wording is deliberately scoped to "within a single + * AWS account" — two environment-labeled VPCs prove network labeling, not + * cross-account isolation (they can be peered / share the account boundary). + */ +export function evaluateEnvironmentSeparation(vpcs: VpcInfo[]): CheckOutcome[] { + const sample = vpcs.slice(0, 50).map((v) => ({ + vpcId: v.vpcId, + region: v.region, + environment: v.environment ?? 'unclassified', + })); + + if (vpcs.length === 0) { + return [ + { + kind: 'fail', + title: 'Could not confirm environment separation', + description: + 'No non-default VPCs were found in this AWS account, so environment separation could not be evaluated here.', + resourceType: 'aws-environment-separation', + resourceId: 'vpcs', + severity: 'low', + remediation: ACCOUNT_GUIDANCE, + evidence: { vpcCount: 0 }, + }, + ]; + } + + const detected = [ + ...new Set( + vpcs.map((v) => v.environment).filter((e): e is string => e !== null), + ), + ]; + + if (detected.length >= 2) { + return [ + { + kind: 'pass', + title: 'Distinct environment-labeled VPCs found', + description: `Detected ${detected.length} distinct environments across non-default VPCs in this AWS account: ${detected.join(', ')}. This evidences environment-labeled network separation within a single account (not cross-account isolation).`, + resourceType: 'aws-environment-separation', + resourceId: 'vpcs', + evidence: { + detectedEnvironments: detected, + vpcCount: vpcs.length, + vpcs: sample, + }, + }, + ]; + } + + return [ + { + kind: 'fail', + title: 'Could not confirm environment separation', + description: + detected.length === 1 + ? `Only one environment ("${detected[0]}") was detected among this account's VPCs; this connection evaluates a single AWS account.` + : "No VPC in this account could be classified by environment, so environment separation could not be confirmed.", + resourceType: 'aws-environment-separation', + resourceId: 'vpcs', + severity: 'low', + remediation: ACCOUNT_GUIDANCE, + evidence: { + detectedEnvironments: detected, + vpcCount: vpcs.length, + vpcs: sample, + }, + }, + ]; +} + +/** + * Separation of Environments check (heuristic, within-account). Lists every + * non-default VPC across the account's regions, classifies each by its + * Environment/Name tag, and passes when >=2 distinct environments are found. + * Account-per-environment separation is invisible from one connection (no + * Organizations access), so a single-environment account fails with guidance to + * connect each env account or upload a diagram — the task accepts manual + * evidence either way. + */ +export const environmentSeparationCheck: IntegrationCheck = { + id: 'aws-environment-separation', + name: 'Separation of environments — production isolated from non-production', + description: + 'Verify production and non-production workloads run in distinct VPCs within the AWS account.', + service: 'ec2-vpc', + taskMapping: TASK_TEMPLATES.separationOfEnvironments, + run: async (ctx: CheckContext) => { + const session = await resolveAwsSessionOrFail(ctx); + if (!session) { + ctx.log( + 'AWS environment-separation check: connection not configured — skipping', + ); + return; + } + + const vpcs: VpcInfo[] = []; + const regionFailures: Array<{ region: string; failure: ReadFailure }> = []; + + for (const region of session.regions) { + try { + const ec2 = new EC2Client({ + region, + credentials: session.credentials, + maxAttempts: 5, + }); + let token: string | undefined; + do { + const resp = await ec2.send( + new DescribeVpcsCommand({ NextToken: token, MaxResults: 1000 }), + ); + for (const v of resp.Vpcs ?? []) { + // Default VPCs ship in every region (usually untagged) and would + // pollute the signal; only evaluate available, non-default VPCs. + if (v.IsDefault === true) continue; + if (v.State && v.State !== 'available') continue; + vpcs.push({ + vpcId: v.VpcId ?? 'unknown', + region, + environment: classifyVpcEnv(v.Tags), + }); + } + token = resp.NextToken; + } while (token); + } catch (err) { + const failure = toReadFailure(err); + regionFailures.push({ region, failure }); + ctx.log(`VPC: could not list VPCs in ${region}: ${failure.error}`); + } + } + + // A region we couldn't read is unverified — surface it instead of letting a + // partial read failure end as a silent verdict. + if (regionFailures.length > 0) { + const regions = regionFailures.map((r) => r.region); + emitOutcomes(ctx, [ + { + kind: 'fail', + title: 'Could not verify VPCs in some regions', + description: `VPCs could not be listed in: ${regions.join(', ')}, so environment separation is unverified in those regions.`, + resourceType: 'aws-environment-separation', + resourceId: `regions:${regions.join(',')}`, + severity: 'medium', + remediation: remediationForReadFailure( + combineReadFailures(regionFailures.map((r) => r.failure)), + 'Grant ec2:DescribeVpcs to the integration role in all enabled regions, then re-run the check.', + ), + evidence: { + failedRegions: regionFailures.map((r) => ({ + region: r.region, + error: r.failure.error, + })), + }, + }, + ]); + // If we also found zero VPCs, the unverified-region finding already tells + // the story — don't add a misleading "no VPCs" verdict on unread data. + if (vpcs.length === 0) return; + } + + emitOutcomes(ctx, evaluateEnvironmentSeparation(vpcs)); + }, +}; diff --git a/packages/integration-platform/src/manifests/aws/checks/index.ts b/packages/integration-platform/src/manifests/aws/checks/index.ts index ac43946a3..10e3b9934 100644 --- a/packages/integration-platform/src/manifests/aws/checks/index.ts +++ b/packages/integration-platform/src/manifests/aws/checks/index.ts @@ -4,3 +4,4 @@ export { ec2SecurityGroupsCheck } from './ec2'; export { rdsEncryptionCheck, rdsBackupsCheck } from './rds'; export { kmsKeyRotationCheck } from './kms'; export { cloudTrailEnabledCheck } from './cloudtrail'; +export { environmentSeparationCheck } from './environment-separation'; diff --git a/packages/integration-platform/src/manifests/aws/index.ts b/packages/integration-platform/src/manifests/aws/index.ts index b84f4a84f..cf6716e3d 100644 --- a/packages/integration-platform/src/manifests/aws/index.ts +++ b/packages/integration-platform/src/manifests/aws/index.ts @@ -2,6 +2,7 @@ import type { IntegrationManifest } from '../../types'; import { cloudTrailEnabledCheck, ec2SecurityGroupsCheck, + environmentSeparationCheck, iamAccountSecurityCheck, kmsKeyRotationCheck, rdsBackupsCheck, @@ -100,5 +101,6 @@ export const awsManifest: IntegrationManifest = { rdsBackupsCheck, kmsKeyRotationCheck, cloudTrailEnabledCheck, + environmentSeparationCheck, ], }; diff --git a/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts b/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts index 6a3c2fc6c..96203120a 100644 --- a/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts +++ b/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts @@ -6,6 +6,7 @@ import type { } from '../../../../types'; import { azureManifest } from '../../index'; import { rbacLeastPrivilegeCheck } from '../entra-id'; +import { environmentSeparationCheck } from '../environment-separation'; import { keyVaultProtectionCheck, keyVaultRbacCheck } from '../key-vault'; import { monitorLoggingAlertingCheck } from '../monitor'; import { @@ -896,3 +897,103 @@ describe('azure subscription picker fetchOptions', () => { expect(options).toEqual([]); }); }); + +describe('Azure environment separation', () => { + const envSepFetch = + (opts: { subs: unknown[]; rgs?: Record }) => + (url: string) => { + if (url.includes('/subscriptions?api-version')) return { value: opts.subs }; + const m = url.match(/\/subscriptions\/([^/]+)\/resourcegroups/); + if (m) return { value: opts.rgs?.[m[1]!] ?? [] }; + return {}; + }; + + it('passes (strong) when >=2 subscriptions classify to distinct environments', async () => { + const { passed, failed } = await run( + environmentSeparationCheck, + envSepFetch({ + subs: [ + { subscriptionId: 's1', state: 'Enabled', displayName: 'Production' }, + { subscriptionId: 's2', state: 'Enabled', displayName: 'Development' }, + ], + }), + ); + expect(failed).toHaveLength(0); + expect(passed).toContain('Environments separated across subscriptions'); + }); + + it('passes (weak) on resource-group separation, disclosed as logical', async () => { + const { passed, failed } = await run( + environmentSeparationCheck, + envSepFetch({ + subs: [{ subscriptionId: 's1', state: 'Enabled', displayName: 'MyCompany' }], + rgs: { + s1: [ + { id: 'a', name: 'rg-prod' }, + { id: 'b', name: 'rg-dev' }, + ], + }, + }), + ); + expect(failed).toHaveLength(0); + expect(passed).toContain('Environments separated across resource groups'); + }); + + it('passes on resource-group tags (case-insensitive key)', async () => { + const { passed } = await run( + environmentSeparationCheck, + envSepFetch({ + subs: [{ subscriptionId: 's1', state: 'Enabled', displayName: 'Company' }], + rgs: { + s1: [ + { id: 'a', name: 'a', tags: { environment: 'production' } }, + { id: 'b', name: 'b', tags: { Environment: 'staging' } }, + ], + }, + }), + ); + expect(passed).toContain('Environments separated across resource groups'); + }); + + it('does NOT union tiers: a prod subscription with an rg-dev inside fails', async () => { + const { passed, failed } = await run( + environmentSeparationCheck, + envSepFetch({ + subs: [{ subscriptionId: 's1', state: 'Enabled', displayName: 'Production' }], + rgs: { s1: [{ id: 'a', name: 'rg-dev' }] }, + }), + ); + expect(passed).toHaveLength(0); + expect(failed.some((f) => /Could not confirm environment separation/.test(f.title))).toBe(true); + }); + + it('fails with guidance when nothing classifies', async () => { + const { passed, failed } = await run( + environmentSeparationCheck, + envSepFetch({ + subs: [{ subscriptionId: 's1', state: 'Enabled', displayName: 'Company' }], + rgs: { s1: [{ id: 'a', name: 'backend' }] }, + }), + ); + expect(passed).toHaveLength(0); + expect(failed).toHaveLength(1); + expect(failed[0]!.remediation).toMatch(/distinct subscriptions/); + }); + + it('fails when no enabled subscriptions are visible', async () => { + const { failed } = await run( + environmentSeparationCheck, + envSepFetch({ subs: [{ subscriptionId: 's1', state: 'Disabled', displayName: 'Old' }] }), + ); + expect(failed.some((f) => /No Azure subscriptions detected/.test(f.title))).toBe(true); + }); + + it('fails "could not verify" when the subscription list read fails', async () => { + const { passed, failed } = await run(environmentSeparationCheck, (url) => { + if (url.includes('/subscriptions?api-version')) throw new Error('HTTP 403: Forbidden'); + return {}; + }); + expect(passed).toHaveLength(0); + expect(failed.some((f) => /Could not verify environment separation/.test(f.title))).toBe(true); + }); +}); diff --git a/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts new file mode 100644 index 000000000..951016043 --- /dev/null +++ b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts @@ -0,0 +1,194 @@ +import { TASK_TEMPLATES } from '../../../task-mappings'; +import type { CheckContext, IntegrationCheck } from '../../../types'; +import { + classifyEnvironment, + envTagValues, +} from '../../environment-classification'; +import { remediationForReadFailure, toHttpReadFailure } from '../../http-read-failure'; +import { ARM_BASE, armListAll } from './shared'; + +const SUBSCRIPTIONS_API_VERSION = '2020-01-01'; +const RESOURCE_GROUPS_API_VERSION = '2021-04-01'; +// Bound the resource-group fan-out (one list call per subscription). +const MAX_SUBSCRIPTIONS = 50; + +interface Subscription { + subscriptionId: string; + displayName?: string; + state?: string; +} + +interface ResourceGroup { + id: string; + name: string; + location?: string; + tags?: Record; +} + +/** + * Classify a resource group into an environment: an explicit `environment` tag + * value first, then the RG name. Only env-key tag values and the name are + * considered (never arbitrary tag values). + */ +export function classifyResourceGroupEnv(rg: { + name: string; + tags?: Record; +}): string | null { + return classifyEnvironment([...envTagValues(rg.tags), rg.name]); +} + +const GUIDANCE = + 'Separate production and non-production into distinct subscriptions (strongest), or tag each resource group with an `environment` tag (e.g. environment=production / environment=staging). If you separate environments another way, upload a console screenshot or architecture diagram as evidence.'; + +/** + * Separation of Environments check (heuristic). Evaluates the whole footprint in + * two tiers, in safety order: + * 1) Subscriptions — a real isolation/RBAC/billing boundary. If >=2 enabled + * subscriptions classify into distinct environments → PASS (strong). + * 2) Resource groups — the most commonly env-named primitive, but only a + * LOGICAL container (shares the subscription's access/network). If >=2 + * distinct environments across RGs → PASS, explicitly disclosed as logical. + * Tiers are NOT unioned: a single prod subscription containing an `rg-dev` + * resource group must not be mistaken for separation. Anything else fails with + * guidance; the task accepts manual evidence. + */ +export const environmentSeparationCheck: IntegrationCheck = { + id: 'azure-environment-separation', + name: 'Separation of environments — production isolated from non-production', + description: + 'Verify production and non-production are separated across Azure subscriptions or resource groups.', + service: 'policy', + taskMapping: TASK_TEMPLATES.separationOfEnvironments, + run: async (ctx: CheckContext) => { + // Footprint-wide: list ALL enabled subscriptions directly (separation is a + // whole-footprint property, so this intentionally ignores subscription + // scoping the way the GCP check ignores project scoping). + let subscriptions: Subscription[]; + try { + const data = await ctx.fetch<{ value?: Subscription[] }>( + `${ARM_BASE}/subscriptions?api-version=${SUBSCRIPTIONS_API_VERSION}`, + ); + subscriptions = (data.value ?? []).filter((s) => s.state === 'Enabled'); + } catch (err) { + const failure = toHttpReadFailure(err); + ctx.fail({ + title: 'Could not verify environment separation', + description: `Azure subscriptions could not be listed (${failure.error}), so environment separation could not be evaluated.`, + resourceType: 'azure-environment-separation', + resourceId: 'subscriptions', + severity: 'medium', + remediation: remediationForReadFailure( + failure, + 'Grant the connection Reader access to the subscription(s), then re-run the check.', + ), + evidence: { readError: failure.error }, + }); + return; + } + + if (subscriptions.length === 0) { + ctx.fail({ + title: 'No Azure subscriptions detected', + description: + 'No enabled Azure subscriptions were accessible, so environment separation could not be evaluated.', + resourceType: 'azure-environment-separation', + resourceId: 'subscriptions', + severity: 'medium', + remediation: + 'Grant the connection Reader access to your subscriptions, then re-run — or upload a console screenshot / architecture diagram as evidence.', + evidence: { subscriptionCount: 0 }, + }); + return; + } + + // Tier 1 (strong): subscription-level separation. + const subscriptionEnvs = [ + ...new Set( + subscriptions + .map((s) => classifyEnvironment([s.displayName])) + .filter((e): e is string => e !== null), + ), + ]; + if (subscriptionEnvs.length >= 2) { + ctx.pass({ + title: 'Environments separated across subscriptions', + description: `Detected ${subscriptionEnvs.length} distinct environments across ${subscriptions.length} Azure subscriptions: ${subscriptionEnvs.join(', ')} (subscription-level boundary).`, + resourceType: 'azure-environment-separation', + resourceId: 'subscriptions', + evidence: { + boundary: 'subscription', + detectedEnvironments: subscriptionEnvs, + subscriptionCount: subscriptions.length, + }, + }); + return; + } + + // Tier 2 (weak, logical): resource-group-level separation across the footprint. + const boundedSubs = subscriptions.slice(0, MAX_SUBSCRIPTIONS); + const rgEnvSet = new Set(); + const rgSamples: Array<{ name: string; environment: string }> = []; + let anyRgReadFailed = false; + for (const sub of boundedSubs) { + let resourceGroups: ResourceGroup[]; + try { + resourceGroups = await armListAll( + ctx, + `${ARM_BASE}/subscriptions/${sub.subscriptionId}/resourcegroups?api-version=${RESOURCE_GROUPS_API_VERSION}`, + ); + } catch (err) { + anyRgReadFailed = true; + ctx.log( + `Azure env-separation: could not list resource groups in ${sub.subscriptionId} — ${toHttpReadFailure(err).error}`, + ); + continue; + } + for (const rg of resourceGroups) { + const env = classifyResourceGroupEnv(rg); + if (env) { + rgEnvSet.add(env); + if (rgSamples.length < 50) rgSamples.push({ name: rg.name, environment: env }); + } + } + } + const resourceGroupEnvs = [...rgEnvSet]; + if (resourceGroupEnvs.length >= 2) { + ctx.pass({ + title: 'Environments separated across resource groups', + description: `Detected ${resourceGroupEnvs.length} distinct environments across resource groups in ${boundedSubs.length} subscription(s): ${resourceGroupEnvs.join(', ')}. Resource-group separation is logical — RGs share the subscription's access and network boundary — not full isolation.`, + resourceType: 'azure-environment-separation', + resourceId: 'resource-groups', + evidence: { + boundary: 'resource-group', + detectedEnvironments: resourceGroupEnvs, + subscriptionCount: boundedSubs.length, + resourceGroups: rgSamples, + }, + }); + return; + } + + // Neither tier confirmed separation. + const detectedAll = [...new Set([...subscriptionEnvs, ...resourceGroupEnvs])]; + ctx.fail({ + title: + anyRgReadFailed && detectedAll.length < 2 + ? 'Could not verify environment separation' + : 'Could not confirm environment separation', + description: + detectedAll.length === 0 + ? `No Azure subscription or resource group could be classified by environment across ${subscriptions.length} subscription(s)${anyRgReadFailed ? ' (some resource groups could not be read)' : ''}, so environment separation could not be confirmed.` + : `Environment(s) detected (${detectedAll.join(', ')}) but not 2+ distinct environments at a single boundary (subscriptions, or resource groups), so environment separation could not be confirmed.`, + resourceType: 'azure-environment-separation', + resourceId: 'subscriptions', + severity: 'medium', + remediation: GUIDANCE, + evidence: { + subscriptionEnvironments: subscriptionEnvs, + resourceGroupEnvironments: resourceGroupEnvs, + subscriptionCount: subscriptions.length, + resourceGroupsClassified: rgSamples.length, + }, + }); + }, +}; diff --git a/packages/integration-platform/src/manifests/azure/checks/index.ts b/packages/integration-platform/src/manifests/azure/checks/index.ts index ce7d9f988..5e1e4a5f7 100644 --- a/packages/integration-platform/src/manifests/azure/checks/index.ts +++ b/packages/integration-platform/src/manifests/azure/checks/index.ts @@ -10,3 +10,4 @@ export { keyVaultProtectionCheck, keyVaultRbacCheck } from './key-vault'; export { nsgNoOpenPortsCheck } from './network'; export { rbacLeastPrivilegeCheck } from './entra-id'; export { monitorLoggingAlertingCheck } from './monitor'; +export { environmentSeparationCheck } from './environment-separation'; diff --git a/packages/integration-platform/src/manifests/azure/index.ts b/packages/integration-platform/src/manifests/azure/index.ts index f8eb4255e..8659becc3 100644 --- a/packages/integration-platform/src/manifests/azure/index.ts +++ b/packages/integration-platform/src/manifests/azure/index.ts @@ -1,5 +1,6 @@ import type { IntegrationManifest } from '../../types'; import { + environmentSeparationCheck, keyVaultProtectionCheck, keyVaultRbacCheck, monitorLoggingAlertingCheck, @@ -176,5 +177,6 @@ Our integration only makes read-only API calls for security scanning.`, nsgNoOpenPortsCheck, rbacLeastPrivilegeCheck, monitorLoggingAlertingCheck, + environmentSeparationCheck, ], }; diff --git a/packages/integration-platform/src/manifests/environment-classification.ts b/packages/integration-platform/src/manifests/environment-classification.ts new file mode 100644 index 000000000..f11eb245a --- /dev/null +++ b/packages/integration-platform/src/manifests/environment-classification.ts @@ -0,0 +1,69 @@ +/** + * Shared environment classification for "Separation of Environments" checks + * across cloud manifests (GCP projects, AWS VPCs, Azure subscriptions/resource + * groups). Kept here — alongside `http-read-failure` — because all three cloud + * manifests need the identical, well-tested logic. + * + * Matching is TOKEN-EXACT, not substring: a candidate string is split on any + * run of non-alphanumeric characters (`-`, `_`, `.`, `/`, spaces) and each + * token is compared exactly to a set of environment keywords. This is why + * "production"/"product" and "dev"/"developer" never collide, and why separator + * style doesn't matter (`myapp-prod`, `myapp_prod`, `myapp.prod` all classify). + */ + +const ENV_TOKEN_SETS: ReadonlyArray<{ env: string; tokens: ReadonlySet }> = [ + // Production is first so it wins ties when a string carries multiple tokens. + { env: 'production', tokens: new Set(['prod', 'production', 'prd', 'live']) }, + { env: 'staging', tokens: new Set(['staging', 'stage', 'stg', 'preprod', 'uat']) }, + { env: 'development', tokens: new Set(['dev', 'develop', 'development']) }, + { env: 'test', tokens: new Set(['test', 'testing', 'qa']) }, + { env: 'sandbox', tokens: new Set(['sandbox', 'sbx', 'demo']) }, +]; + +/** Default tag/label keys that conventionally carry the environment. */ +export const ENV_TAG_KEYS = ['environment', 'env', 'stage', 'tier'] as const; + +/** Split on any run of non-alphanumeric chars; lowercased, empties removed. */ +function tokenize(value: string): string[] { + return value + .toLowerCase() + .split(/[^a-z0-9]+/) + .filter((t) => t.length > 0); +} + +/** + * Classify a list of candidate strings (e.g. an environment tag/label value, + * then a resource name) into a canonical environment, or null if none match. + * Candidates are tried in order, so callers should pass the most authoritative + * source (explicit env tag/label value) before the resource name. + */ +export function classifyEnvironment( + candidates: ReadonlyArray, +): string | null { + for (const candidate of candidates) { + if (!candidate) continue; + const tokens = tokenize(candidate); + for (const { env, tokens: keywords } of ENV_TOKEN_SETS) { + if (tokens.some((t) => keywords.has(t))) return env; + } + } + return null; +} + +/** + * Extract the values of environment-indicating tag/label keys from a tag map. + * Key matching is case-insensitive (Azure/AWS tag keys vary in casing). Only + * the configured env keys are read — arbitrary tag values are deliberately NOT + * scanned, so a stray `team=dev-team` tag can't fabricate an environment. + */ +export function envTagValues( + tags: Record | undefined, + keys: ReadonlyArray = ENV_TAG_KEYS, +): string[] { + if (!tags) return []; + const wanted = new Set(keys.map((k) => k.toLowerCase())); + return Object.entries(tags) + .filter(([k]) => wanted.has(k.toLowerCase())) + .map(([, v]) => v) + .filter((v): v is string => typeof v === 'string' && v.length > 0); +} diff --git a/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts b/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts index b825bf7c6..fa75eb6b2 100644 --- a/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts +++ b/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts @@ -889,10 +889,11 @@ describe('GCP Cloud SQL encryption check', () => { }); describe('classifyProjectEnv — token matching', () => { - it('classifies by name token', () => { + it('classifies by name token (any separator, incl. underscore)', () => { expect(classifyProjectEnv({ projectId: 'myapp-prod' })).toBe('production'); expect(classifyProjectEnv({ projectId: 'myapp-dev-123' })).toBe('development'); expect(classifyProjectEnv({ projectId: 'web-staging' })).toBe('staging'); + expect(classifyProjectEnv({ projectId: 'myapp_prod' })).toBe('production'); }); it('prefers an explicit environment label over the name', () => { diff --git a/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts b/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts index 499cc08cd..b789b2b83 100644 --- a/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts +++ b/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts @@ -1,5 +1,9 @@ import { TASK_TEMPLATES } from '../../../task-mappings'; import type { CheckContext, IntegrationCheck } from '../../../types'; +import { + classifyEnvironment, + envTagValues, +} from '../../environment-classification'; import { remediationForReadFailure, toHttpReadFailure, @@ -12,35 +16,18 @@ interface GcpProject { } /** - * Environment classification by token. Production is listed first so it wins - * when a string could match more than one bucket. Tokens are matched with word - * boundaries (so "product"/"developer" do NOT match "prod"/"dev"). + * Classify a project into an environment bucket, or null if undetermined. + * Explicit `environment`/`env` label values are most authoritative, then the + * project id / display name. Token matching (shared classifier) means + * "product"/"developer" do NOT match "prod"/"dev" and separator style + * (`-`/`_`/`.`) doesn't matter. */ -const ENV_PATTERNS: ReadonlyArray<{ env: string; re: RegExp }> = [ - { env: 'production', re: /\b(prod|production|prd|live)\b/i }, - { env: 'staging', re: /\b(staging|stage|stg|preprod|uat)\b/i }, - { env: 'development', re: /\b(dev|develop|development)\b/i }, - { env: 'test', re: /\b(test|testing|qa)\b/i }, - { env: 'sandbox', re: /\b(sandbox|sbx|demo)\b/i }, -]; - -// Label keys that conventionally carry the environment, checked before falling -// back to the project name/id — an explicit label is more authoritative. -const ENV_LABEL_KEYS = ['environment', 'env', 'stage', 'tier'] as const; - -/** Classify a project into an environment bucket, or null if undetermined. */ export function classifyProjectEnv(project: GcpProject): string | null { - const labelValues = ENV_LABEL_KEYS.map((k) => project.labels?.[k]).filter( - (v): v is string => typeof v === 'string' && v.length > 0, - ); - // Explicit env labels first, then the project id / display name. - const haystacks = [...labelValues, project.projectId, project.name ?? '']; - for (const haystack of haystacks) { - for (const { env, re } of ENV_PATTERNS) { - if (re.test(haystack)) return env; - } - } - return null; + return classifyEnvironment([ + ...envTagValues(project.labels), + project.projectId, + project.name, + ]); } /** List every active project the connection can see (bounded pagination). */ From 11696f9624111e2845d2c487ecee797f5232b124 Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 17 Jun 2026 18:13:02 -0400 Subject: [PATCH 3/6] fix(integrations): env-tag priority order + surface Azure subscription scan cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses two review findings: - P1 (envTagValues ordering): values were returned in tag-map insertion order instead of ENV_TAG_KEYS priority order, so a less-authoritative key (stage=dev) could outrank a more-authoritative one (environment=prod) and misclassify. Now iterates the env keys in priority order via a lowercased lookup map, so `environment` always wins over `stage`/`tier`. +test. - P2 (Azure RG scan cap): the resource-group tier silently bounded to 50 subscriptions. Now the cap is surfaced as explicit coverage info — the fail verdict flips to "could not verify", names "only N of M enabled subscriptions were scanned", carries enabledSubscriptions vs subscriptionsScannedForResourceGroups in evidence, and the remediation tells the user to reduce scope. (A >=2-env PASS is not invalidated by truncation — scanning more subscriptions can only add environments — so the scanned count is recorded in pass evidence for transparency rather than failing a valid pass.) +test (60 enabled subscriptions). 373 package tests pass; typecheck + build green. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../environment-classification.test.ts | 10 +++- .../checks/__tests__/azure-checks.test.ts | 22 +++++++++ .../azure/checks/environment-separation.ts | 47 +++++++++++++++---- .../manifests/environment-classification.ts | 12 +++-- 4 files changed, 76 insertions(+), 15 deletions(-) diff --git a/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts b/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts index c71bde6aa..bbbef013e 100644 --- a/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts +++ b/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts @@ -51,7 +51,15 @@ describe('classifyEnvironment — token-exact matching', () => { describe('envTagValues — only env-key tags, case-insensitive', () => { it('reads environment-indicating keys regardless of case', () => { expect(envTagValues({ Environment: 'production' })).toEqual(['production']); - expect(envTagValues({ env: 'prod', stage: 'dev' }).sort()).toEqual(['dev', 'prod']); + }); + + it('returns values in env-key PRIORITY order, not tag insertion order', () => { + // `environment` outranks `stage` even though `stage` is inserted first. + expect(envTagValues({ stage: 'dev', environment: 'prod' })).toEqual(['prod', 'dev']); + // so the authoritative key wins classification + expect(classifyEnvironment(envTagValues({ stage: 'dev', environment: 'prod' }))).toBe( + 'production', + ); }); it('ignores non-environment tags (false-positive guard)', () => { diff --git a/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts b/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts index 96203120a..f76676f30 100644 --- a/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts +++ b/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts @@ -996,4 +996,26 @@ describe('Azure environment separation', () => { expect(passed).toHaveLength(0); expect(failed.some((f) => /Could not verify environment separation/.test(f.title))).toBe(true); }); + + it('surfaces the subscription scan cap as explicit coverage info (no silent truncation)', async () => { + // 60 enabled subscriptions, none env-named, no classifiable resource groups + // → unconfirmed, and the >50 RG-scan cap must be disclosed. + const subs = Array.from({ length: 60 }, (_, i) => ({ + subscriptionId: `s${i}`, + state: 'Enabled', + displayName: `Company-${i}`, + })); + const { passed, failed } = await run( + environmentSeparationCheck, + envSepFetch({ subs, rgs: {} }), + ); + expect(passed).toHaveLength(0); + expect(failed).toHaveLength(1); + expect(failed[0]!.title).toMatch(/Could not verify environment separation/); + expect(failed[0]!.remediation).toMatch(/Reduce to at most 50 enabled subscriptions/); + expect(failed[0]!.evidence).toMatchObject({ + enabledSubscriptions: 60, + subscriptionsScannedForResourceGroups: 50, + }); + }); }); diff --git a/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts index 951016043..20c2181ec 100644 --- a/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts +++ b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts @@ -124,8 +124,12 @@ export const environmentSeparationCheck: IntegrationCheck = { return; } - // Tier 2 (weak, logical): resource-group-level separation across the footprint. + // Tier 2 (weak, logical): resource-group-level separation across the + // footprint. The RG fan-out is bounded (one list call per subscription); + // a footprint larger than the cap is surfaced as incomplete coverage below, + // never silently dropped. const boundedSubs = subscriptions.slice(0, MAX_SUBSCRIPTIONS); + const truncated = subscriptions.length > boundedSubs.length; const rgEnvSet = new Set(); const rgSamples: Array<{ name: string; environment: string }> = []; let anyRgReadFailed = false; @@ -152,6 +156,10 @@ export const environmentSeparationCheck: IntegrationCheck = { } } const resourceGroupEnvs = [...rgEnvSet]; + // A PASS means >=2 environments were positively found; scanning the + // remaining subscriptions could only ADD environments, never remove the two + // we already have, so truncation cannot invalidate a pass. The scanned count + // is still recorded in evidence for transparency. if (resourceGroupEnvs.length >= 2) { ctx.pass({ title: 'Environments separated across resource groups', @@ -161,32 +169,51 @@ export const environmentSeparationCheck: IntegrationCheck = { evidence: { boundary: 'resource-group', detectedEnvironments: resourceGroupEnvs, - subscriptionCount: boundedSubs.length, + enabledSubscriptions: subscriptions.length, + subscriptionsScannedForResourceGroups: boundedSubs.length, resourceGroups: rgSamples, }, }); return; } - // Neither tier confirmed separation. + // Neither tier confirmed separation. Surface any incomplete coverage (read + // failures and/or the subscription scan cap) so the verdict is never + // presented as if it covered the whole footprint. const detectedAll = [...new Set([...subscriptionEnvs, ...resourceGroupEnvs])]; + const coverageGaps: string[] = []; + if (truncated) { + coverageGaps.push( + `only ${boundedSubs.length} of ${subscriptions.length} enabled subscriptions were scanned for resource groups`, + ); + } + if (anyRgReadFailed) { + coverageGaps.push('some resource groups could not be read'); + } + const base = + detectedAll.length === 0 + ? `No Azure subscription or resource group could be classified by environment across ${subscriptions.length} subscription(s)` + : `Environment(s) detected (${detectedAll.join(', ')}) but not 2+ distinct environments at a single boundary (subscriptions, or resource groups)`; ctx.fail({ + // Incomplete coverage that leaves us short of a verdict is "could not + // verify"; a complete scan that simply found no separation is "could not + // confirm". title: - anyRgReadFailed && detectedAll.length < 2 + coverageGaps.length > 0 && detectedAll.length < 2 ? 'Could not verify environment separation' : 'Could not confirm environment separation', - description: - detectedAll.length === 0 - ? `No Azure subscription or resource group could be classified by environment across ${subscriptions.length} subscription(s)${anyRgReadFailed ? ' (some resource groups could not be read)' : ''}, so environment separation could not be confirmed.` - : `Environment(s) detected (${detectedAll.join(', ')}) but not 2+ distinct environments at a single boundary (subscriptions, or resource groups), so environment separation could not be confirmed.`, + description: `${base}${coverageGaps.length ? ` (${coverageGaps.join('; ')})` : ''}, so environment separation could not be confirmed.`, resourceType: 'azure-environment-separation', resourceId: 'subscriptions', severity: 'medium', - remediation: GUIDANCE, + remediation: truncated + ? `Reduce to at most ${MAX_SUBSCRIPTIONS} enabled subscriptions (or contact support to raise the limit). ${GUIDANCE}` + : GUIDANCE, evidence: { subscriptionEnvironments: subscriptionEnvs, resourceGroupEnvironments: resourceGroupEnvs, - subscriptionCount: subscriptions.length, + enabledSubscriptions: subscriptions.length, + subscriptionsScannedForResourceGroups: boundedSubs.length, resourceGroupsClassified: rgSamples.length, }, }); diff --git a/packages/integration-platform/src/manifests/environment-classification.ts b/packages/integration-platform/src/manifests/environment-classification.ts index f11eb245a..fe8355bf7 100644 --- a/packages/integration-platform/src/manifests/environment-classification.ts +++ b/packages/integration-platform/src/manifests/environment-classification.ts @@ -61,9 +61,13 @@ export function envTagValues( keys: ReadonlyArray = ENV_TAG_KEYS, ): string[] { if (!tags) return []; - const wanted = new Set(keys.map((k) => k.toLowerCase())); - return Object.entries(tags) - .filter(([k]) => wanted.has(k.toLowerCase())) - .map(([, v]) => v) + // Iterate the configured keys in PRIORITY order (not the tag map's insertion + // order) so a more authoritative key (`environment`) is returned before a + // less authoritative one (`stage`) — `classifyEnvironment` trusts order. + const normalized = new Map( + Object.entries(tags).map(([k, v]) => [k.toLowerCase(), v]), + ); + return keys + .map((k) => normalized.get(k.toLowerCase())) .filter((v): v is string => typeof v === 'string' && v.length > 0); } From adc2634f9b8b4bc398ef90b1a2b7574e7b696c2d Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 17 Jun 2026 18:54:51 -0400 Subject: [PATCH 4/6] =?UTF-8?q?fix(integrations):=20address=20env-separati?= =?UTF-8?q?on=20review=20=E2=80=94=20scope,=20prod+non-prod=20bar,=20trunc?= =?UTF-8?q?ation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the 3 issues cubic flagged on the release PR (all verified real against the code), plus a related consistency fix: 1. Azure ignored the configured subscription scope and scanned ALL enabled subscriptions, breaking the opt-in scan contract. Now scopes via resolveAzureSubscriptionIds (selection → legacy → first enabled; bounds the fan-out and surfaces over-limit/empty scope as its own finding). Both the per-subscription name read and the resource-group list touch ONLY in-scope subscriptions. 2. Pass condition was too weak (>=2 of any environments could pass on dev + staging without confirming production). New shared confirmsEnvironmentSeparation requires PRODUCTION plus at least one NON-PRODUCTION environment; applied to GCP, AWS and Azure. 3. GCP pagination cap evaluated a partial footprint but returned a normal verdict. Discovery truncation now propagates: a non-confirmed verdict under truncation becomes "could not verify" (evidence.discoveryTruncated). A confirmed pass still stands (scanning more projects can only ADD environments, never remove the prod/non-prod already found). 4. (Consistency, same class as #1) GCP now honors the project_ids opt-in scope — fetches the selected projects directly instead of always listing all. All branches fail-with-guidance, never silent pass; read failures → "could not verify"; the task accepts manual evidence. Adversarially re-verified (no new false-pass, no regressions). 382 package tests pass; typecheck + build green. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../environment-classification.test.ts | 18 ++ .../aws/checks/__tests__/aws-checks.test.ts | 11 +- .../aws/checks/environment-separation.ts | 28 +-- .../checks/__tests__/azure-checks.test.ts | 124 +++++++------ .../azure/checks/environment-separation.ts | 171 ++++++------------ .../manifests/environment-classification.ts | 18 ++ .../gcp/checks/__tests__/gcp-checks.test.ts | 82 ++++++++- .../gcp/checks/environment-separation.ts | 154 +++++++++++----- 8 files changed, 371 insertions(+), 235 deletions(-) diff --git a/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts b/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts index bbbef013e..5d4fd0453 100644 --- a/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts +++ b/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts @@ -1,9 +1,27 @@ import { describe, expect, it } from 'bun:test'; import { classifyEnvironment, + confirmsEnvironmentSeparation, envTagValues, } from '../environment-classification'; +describe('confirmsEnvironmentSeparation — requires prod + non-prod', () => { + it('passes only with production AND a non-production environment', () => { + expect(confirmsEnvironmentSeparation(['production', 'development'])).toBe(true); + expect(confirmsEnvironmentSeparation(['production', 'staging', 'test'])).toBe(true); + }); + + it('fails on two non-production environments (no production)', () => { + expect(confirmsEnvironmentSeparation(['development', 'staging'])).toBe(false); + }); + + it('fails on production alone, or a single environment, or none', () => { + expect(confirmsEnvironmentSeparation(['production'])).toBe(false); + expect(confirmsEnvironmentSeparation(['development'])).toBe(false); + expect(confirmsEnvironmentSeparation([])).toBe(false); + }); +}); + describe('classifyEnvironment — token-exact matching', () => { it('classifies common environment tokens', () => { expect(classifyEnvironment(['myapp-prod'])).toBe('production'); diff --git a/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts b/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts index 5a8f446c7..89b42dfa5 100644 --- a/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts +++ b/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts @@ -970,7 +970,7 @@ describe('AWS environment separation', () => { expect(classifyVpcEnv(undefined)).toBeNull(); }); - it('passes when >=2 environments are found, without claiming cross-account isolation', () => { + it('passes on production + non-production, without claiming cross-account isolation', () => { const out = evaluateEnvironmentSeparation([ { vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }, { vpcId: 'vpc-2', region: 'us-east-1', environment: 'development' }, @@ -980,6 +980,15 @@ describe('AWS environment separation', () => { expect(out[0]!.description).toMatch(/not cross-account isolation/); }); + it('fails when only non-production environments are present (no production)', () => { + const out = evaluateEnvironmentSeparation([ + { vpcId: 'vpc-1', region: 'us-east-1', environment: 'development' }, + { vpcId: 'vpc-2', region: 'us-east-1', environment: 'staging' }, + ]); + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('fail'); + }); + it('fails (low) with guidance when only one environment is detected', () => { const out = evaluateEnvironmentSeparation([ { vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }, diff --git a/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts b/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts index fb2b8cd40..142c4eec1 100644 --- a/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts +++ b/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts @@ -3,6 +3,7 @@ import { TASK_TEMPLATES } from '../../../task-mappings'; import type { CheckContext, IntegrationCheck } from '../../../types'; import { classifyEnvironment, + confirmsEnvironmentSeparation, envTagValues, } from '../../environment-classification'; import { @@ -50,11 +51,12 @@ export function classifyVpcEnv( } /** - * Pure verdict from the account's non-default VPCs. PASS only when >=2 distinct - * environments are positively observed; otherwise fail-with-guidance (never a - * silent pass). The PASS wording is deliberately scoped to "within a single - * AWS account" — two environment-labeled VPCs prove network labeling, not - * cross-account isolation (they can be peered / share the account boundary). + * Pure verdict from the account's non-default VPCs. PASS only when a PRODUCTION + * environment is positively observed alongside at least one NON-PRODUCTION + * environment; otherwise fail-with-guidance (never a silent pass). The PASS + * wording is deliberately scoped to "within a single AWS account" — environment- + * labeled VPCs prove network labeling, not cross-account isolation (they can be + * peered / share the account boundary). */ export function evaluateEnvironmentSeparation(vpcs: VpcInfo[]): CheckOutcome[] { const sample = vpcs.slice(0, 50).map((v) => ({ @@ -85,12 +87,15 @@ export function evaluateEnvironmentSeparation(vpcs: VpcInfo[]): CheckOutcome[] { ), ]; - if (detected.length >= 2) { + // A confirmed pass requires a production environment separated from a + // non-production one — two non-production VPCs alone do not demonstrate that + // production is segregated. + if (confirmsEnvironmentSeparation(detected)) { return [ { kind: 'pass', title: 'Distinct environment-labeled VPCs found', - description: `Detected ${detected.length} distinct environments across non-default VPCs in this AWS account: ${detected.join(', ')}. This evidences environment-labeled network separation within a single account (not cross-account isolation).`, + description: `Detected production separated from non-production across non-default VPCs in this AWS account: ${detected.join(', ')}. This evidences environment-labeled network separation within a single account (not cross-account isolation).`, resourceType: 'aws-environment-separation', resourceId: 'vpcs', evidence: { @@ -107,9 +112,9 @@ export function evaluateEnvironmentSeparation(vpcs: VpcInfo[]): CheckOutcome[] { kind: 'fail', title: 'Could not confirm environment separation', description: - detected.length === 1 - ? `Only one environment ("${detected[0]}") was detected among this account's VPCs; this connection evaluates a single AWS account.` - : "No VPC in this account could be classified by environment, so environment separation could not be confirmed.", + detected.length === 0 + ? "No VPC in this account could be classified by environment, so environment separation could not be confirmed." + : `Detected environment(s) ${detected.join(', ')} among this account's VPCs, but could not confirm a production environment separated from a non-production one; this connection evaluates a single AWS account.`, resourceType: 'aws-environment-separation', resourceId: 'vpcs', severity: 'low', @@ -126,7 +131,8 @@ export function evaluateEnvironmentSeparation(vpcs: VpcInfo[]): CheckOutcome[] { /** * Separation of Environments check (heuristic, within-account). Lists every * non-default VPC across the account's regions, classifies each by its - * Environment/Name tag, and passes when >=2 distinct environments are found. + * Environment/Name tag, and passes when a production environment is found + * alongside at least one non-production environment. * Account-per-environment separation is invisible from one connection (no * Organizations access), so a single-environment account fails with guidance to * connect each env account or upload a diagram — the task accepts manual diff --git a/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts b/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts index f76676f30..fbc154d9c 100644 --- a/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts +++ b/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts @@ -899,24 +899,24 @@ describe('azure subscription picker fetchOptions', () => { }); describe('Azure environment separation', () => { - const envSepFetch = - (opts: { subs: unknown[]; rgs?: Record }) => + // Mocks the IN-SCOPE per-subscription name GET and the per-subscription + // resource-group list. Scope is driven by the `variables` arg (subscription_id + // / subscription_ids) via resolveAzureSubscriptionIds — there is NO list-all. + const azFetch = + (opts: { names?: Record; rgs?: Record }) => (url: string) => { - if (url.includes('/subscriptions?api-version')) return { value: opts.subs }; - const m = url.match(/\/subscriptions\/([^/]+)\/resourcegroups/); - if (m) return { value: opts.rgs?.[m[1]!] ?? [] }; + const subM = url.match(/\/subscriptions\/([^/?]+)\?api-version/); + if (subM) return { displayName: opts.names?.[subM[1]!] ?? subM[1]! }; + const rgM = url.match(/\/subscriptions\/([^/]+)\/resourcegroups/); + if (rgM) return { value: opts.rgs?.[rgM[1]!] ?? [] }; return {}; }; - it('passes (strong) when >=2 subscriptions classify to distinct environments', async () => { + it('passes (strong) when scoped subscriptions classify to prod + non-prod', async () => { const { passed, failed } = await run( environmentSeparationCheck, - envSepFetch({ - subs: [ - { subscriptionId: 's1', state: 'Enabled', displayName: 'Production' }, - { subscriptionId: 's2', state: 'Enabled', displayName: 'Development' }, - ], - }), + azFetch({ names: { s1: 'Production', s2: 'Development' } }), + { subscription_ids: ['s1', 's2'] }, ); expect(failed).toHaveLength(0); expect(passed).toContain('Environments separated across subscriptions'); @@ -925,14 +925,9 @@ describe('Azure environment separation', () => { it('passes (weak) on resource-group separation, disclosed as logical', async () => { const { passed, failed } = await run( environmentSeparationCheck, - envSepFetch({ - subs: [{ subscriptionId: 's1', state: 'Enabled', displayName: 'MyCompany' }], - rgs: { - s1: [ - { id: 'a', name: 'rg-prod' }, - { id: 'b', name: 'rg-dev' }, - ], - }, + azFetch({ + names: { 'sub-1': 'MyCompany' }, + rgs: { 'sub-1': [{ id: 'a', name: 'rg-prod' }, { id: 'b', name: 'rg-dev' }] }, }), ); expect(failed).toHaveLength(0); @@ -942,10 +937,10 @@ describe('Azure environment separation', () => { it('passes on resource-group tags (case-insensitive key)', async () => { const { passed } = await run( environmentSeparationCheck, - envSepFetch({ - subs: [{ subscriptionId: 's1', state: 'Enabled', displayName: 'Company' }], + azFetch({ + names: { 'sub-1': 'Company' }, rgs: { - s1: [ + 'sub-1': [ { id: 'a', name: 'a', tags: { environment: 'production' } }, { id: 'b', name: 'b', tags: { Environment: 'staging' } }, ], @@ -955,67 +950,84 @@ describe('Azure environment separation', () => { expect(passed).toContain('Environments separated across resource groups'); }); - it('does NOT union tiers: a prod subscription with an rg-dev inside fails', async () => { + it('fails on two non-production environments (no production)', async () => { const { passed, failed } = await run( environmentSeparationCheck, - envSepFetch({ - subs: [{ subscriptionId: 's1', state: 'Enabled', displayName: 'Production' }], - rgs: { s1: [{ id: 'a', name: 'rg-dev' }] }, + azFetch({ + names: { 'sub-1': 'Company' }, + rgs: { 'sub-1': [{ id: 'a', name: 'rg-dev' }, { id: 'b', name: 'rg-staging' }] }, }), ); expect(passed).toHaveLength(0); expect(failed.some((f) => /Could not confirm environment separation/.test(f.title))).toBe(true); }); - it('fails with guidance when nothing classifies', async () => { + it('does NOT union tiers: prod subscription + an rg-dev inside fails', async () => { const { passed, failed } = await run( environmentSeparationCheck, - envSepFetch({ - subs: [{ subscriptionId: 's1', state: 'Enabled', displayName: 'Company' }], - rgs: { s1: [{ id: 'a', name: 'backend' }] }, - }), + azFetch({ names: { s1: 'Production' }, rgs: { s1: [{ id: 'a', name: 'rg-dev' }] } }), + { subscription_ids: ['s1'] }, ); expect(passed).toHaveLength(0); - expect(failed).toHaveLength(1); - expect(failed[0]!.remediation).toMatch(/distinct subscriptions/); + expect(failed.some((f) => /Could not confirm environment separation/.test(f.title))).toBe(true); }); - it('fails when no enabled subscriptions are visible', async () => { - const { failed } = await run( + it('only scans the configured subscription scope', async () => { + // Scope is ['s1']; touching any other subscription must throw. + const { passed, failed } = await run( environmentSeparationCheck, - envSepFetch({ subs: [{ subscriptionId: 's1', state: 'Disabled', displayName: 'Old' }] }), + (url) => { + if (!url.includes('/subscriptions/s1')) { + throw new Error(`out-of-scope access: ${url}`); + } + const subM = url.match(/\/subscriptions\/([^/?]+)\?api-version/); + if (subM) return { displayName: 'Company' }; + if (url.includes('/resourcegroups')) { + return { value: [{ id: 'a', name: 'rg-prod' }, { id: 'b', name: 'rg-dev' }] }; + } + return {}; + }, + { subscription_ids: ['s1'] }, ); - expect(failed.some((f) => /No Azure subscriptions detected/.test(f.title))).toBe(true); + expect(failed).toHaveLength(0); + expect(passed).toContain('Environments separated across resource groups'); }); - it('fails "could not verify" when the subscription list read fails', async () => { + it('fails with guidance when nothing classifies', async () => { + const { passed, failed } = await run( + environmentSeparationCheck, + azFetch({ names: { 'sub-1': 'Company' }, rgs: { 'sub-1': [{ id: 'a', name: 'backend' }] } }), + ); + expect(passed).toHaveLength(0); + expect(failed).toHaveLength(1); + expect(failed[0]!.remediation).toMatch(/distinct subscriptions/); + }); + + it('fails "could not verify" when a resource-group read fails', async () => { const { passed, failed } = await run(environmentSeparationCheck, (url) => { - if (url.includes('/subscriptions?api-version')) throw new Error('HTTP 403: Forbidden'); + if (url.includes('/resourcegroups')) throw new Error('HTTP 403: Forbidden'); + const subM = url.match(/\/subscriptions\/([^/?]+)\?api-version/); + if (subM) return { displayName: 'Company' }; return {}; }); expect(passed).toHaveLength(0); expect(failed.some((f) => /Could not verify environment separation/.test(f.title))).toBe(true); }); - it('surfaces the subscription scan cap as explicit coverage info (no silent truncation)', async () => { - // 60 enabled subscriptions, none env-named, no classifiable resource groups - // → unconfirmed, and the >50 RG-scan cap must be disclosed. - const subs = Array.from({ length: 60 }, (_, i) => ({ - subscriptionId: `s${i}`, - state: 'Enabled', - displayName: `Company-${i}`, - })); + it('defers to the scope resolver when no subscription is in scope', async () => { + // variables {} → discovery; no enabled subscription → resolveAzureSubscriptionIds + // emits its own scope finding and the check early-returns (no double fail). const { passed, failed } = await run( environmentSeparationCheck, - envSepFetch({ subs, rgs: {} }), + (url) => { + if (url.includes('/subscriptions?api-version')) { + return { value: [{ subscriptionId: 's1', state: 'Disabled' }] }; + } + return {}; + }, + {}, ); expect(passed).toHaveLength(0); - expect(failed).toHaveLength(1); - expect(failed[0]!.title).toMatch(/Could not verify environment separation/); - expect(failed[0]!.remediation).toMatch(/Reduce to at most 50 enabled subscriptions/); - expect(failed[0]!.evidence).toMatchObject({ - enabledSubscriptions: 60, - subscriptionsScannedForResourceGroups: 50, - }); + expect(failed.some((f) => /Could not verify Azure subscription scope/.test(f.title))).toBe(true); }); }); diff --git a/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts index 20c2181ec..265b8fd5a 100644 --- a/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts +++ b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts @@ -2,21 +2,14 @@ import { TASK_TEMPLATES } from '../../../task-mappings'; import type { CheckContext, IntegrationCheck } from '../../../types'; import { classifyEnvironment, + confirmsEnvironmentSeparation, envTagValues, } from '../../environment-classification'; -import { remediationForReadFailure, toHttpReadFailure } from '../../http-read-failure'; -import { ARM_BASE, armListAll } from './shared'; +import { toHttpReadFailure } from '../../http-read-failure'; +import { ARM_BASE, armListAll, resolveAzureSubscriptionIds } from './shared'; -const SUBSCRIPTIONS_API_VERSION = '2020-01-01'; +const SUBSCRIPTION_API_VERSION = '2020-01-01'; const RESOURCE_GROUPS_API_VERSION = '2021-04-01'; -// Bound the resource-group fan-out (one list call per subscription). -const MAX_SUBSCRIPTIONS = 50; - -interface Subscription { - subscriptionId: string; - displayName?: string; - state?: string; -} interface ResourceGroup { id: string; @@ -41,16 +34,18 @@ const GUIDANCE = 'Separate production and non-production into distinct subscriptions (strongest), or tag each resource group with an `environment` tag (e.g. environment=production / environment=staging). If you separate environments another way, upload a console screenshot or architecture diagram as evidence.'; /** - * Separation of Environments check (heuristic). Evaluates the whole footprint in - * two tiers, in safety order: - * 1) Subscriptions — a real isolation/RBAC/billing boundary. If >=2 enabled - * subscriptions classify into distinct environments → PASS (strong). + * Separation of Environments check (heuristic). Evaluates ONLY the subscriptions + * the connection is scoped to via `resolveAzureSubscriptionIds` — the same + * opt-in scope every Azure check honors (selection → legacy single → first + * enabled). That helper also bounds the fan-out and surfaces an over-limit + * selection or an unresolvable scope as its own finding, and returns [] when + * nothing is in scope. Two tiers, in safety order: + * 1) Subscriptions — a real isolation/RBAC/billing boundary. * 2) Resource groups — the most commonly env-named primitive, but only a - * LOGICAL container (shares the subscription's access/network). If >=2 - * distinct environments across RGs → PASS, explicitly disclosed as logical. - * Tiers are NOT unioned: a single prod subscription containing an `rg-dev` - * resource group must not be mistaken for separation. Anything else fails with - * guidance; the task accepts manual evidence. + * LOGICAL container (shares the subscription's access/network). + * A pass requires a PRODUCTION environment separated from a NON-PRODUCTION one + * (not merely two non-production environments). Tiers are not unioned; anything + * else fails with guidance and the task accepts manual evidence. */ export const environmentSeparationCheck: IntegrationCheck = { id: 'azure-environment-separation', @@ -60,90 +55,59 @@ export const environmentSeparationCheck: IntegrationCheck = { service: 'policy', taskMapping: TASK_TEMPLATES.separationOfEnvironments, run: async (ctx: CheckContext) => { - // Footprint-wide: list ALL enabled subscriptions directly (separation is a - // whole-footprint property, so this intentionally ignores subscription - // scoping the way the GCP check ignores project scoping). - let subscriptions: Subscription[]; - try { - const data = await ctx.fetch<{ value?: Subscription[] }>( - `${ARM_BASE}/subscriptions?api-version=${SUBSCRIPTIONS_API_VERSION}`, - ); - subscriptions = (data.value ?? []).filter((s) => s.state === 'Enabled'); - } catch (err) { - const failure = toHttpReadFailure(err); - ctx.fail({ - title: 'Could not verify environment separation', - description: `Azure subscriptions could not be listed (${failure.error}), so environment separation could not be evaluated.`, - resourceType: 'azure-environment-separation', - resourceId: 'subscriptions', - severity: 'medium', - remediation: remediationForReadFailure( - failure, - 'Grant the connection Reader access to the subscription(s), then re-run the check.', - ), - evidence: { readError: failure.error }, - }); - return; - } + const subscriptionIds = await resolveAzureSubscriptionIds(ctx); + // resolveAzureSubscriptionIds already emitted a finding when scope is empty. + if (subscriptionIds.length === 0) return; - if (subscriptions.length === 0) { - ctx.fail({ - title: 'No Azure subscriptions detected', - description: - 'No enabled Azure subscriptions were accessible, so environment separation could not be evaluated.', - resourceType: 'azure-environment-separation', - resourceId: 'subscriptions', - severity: 'medium', - remediation: - 'Grant the connection Reader access to your subscriptions, then re-run — or upload a console screenshot / architecture diagram as evidence.', - evidence: { subscriptionCount: 0 }, - }); - return; + // Tier 1 (strong): subscription-level separation. Read each IN-SCOPE + // subscription's display name only — we never touch subscriptions outside + // the configured selection. + const subscriptionEnvSet = new Set(); + for (const id of subscriptionIds) { + try { + const sub = await ctx.fetch<{ displayName?: string }>( + `${ARM_BASE}/subscriptions/${id}?api-version=${SUBSCRIPTION_API_VERSION}`, + ); + const env = classifyEnvironment([sub.displayName]); + if (env) subscriptionEnvSet.add(env); + } catch (err) { + ctx.log( + `Azure env-separation: could not read subscription ${id} — ${toHttpReadFailure(err).error}`, + ); + } } - - // Tier 1 (strong): subscription-level separation. - const subscriptionEnvs = [ - ...new Set( - subscriptions - .map((s) => classifyEnvironment([s.displayName])) - .filter((e): e is string => e !== null), - ), - ]; - if (subscriptionEnvs.length >= 2) { + const subscriptionEnvs = [...subscriptionEnvSet]; + if (confirmsEnvironmentSeparation(subscriptionEnvs)) { ctx.pass({ title: 'Environments separated across subscriptions', - description: `Detected ${subscriptionEnvs.length} distinct environments across ${subscriptions.length} Azure subscriptions: ${subscriptionEnvs.join(', ')} (subscription-level boundary).`, + description: `Detected production separated from non-production across ${subscriptionIds.length} in-scope Azure subscription(s): ${subscriptionEnvs.join(', ')} (subscription-level boundary).`, resourceType: 'azure-environment-separation', resourceId: 'subscriptions', evidence: { boundary: 'subscription', detectedEnvironments: subscriptionEnvs, - subscriptionCount: subscriptions.length, + subscriptionsScanned: subscriptionIds.length, }, }); return; } - // Tier 2 (weak, logical): resource-group-level separation across the - // footprint. The RG fan-out is bounded (one list call per subscription); - // a footprint larger than the cap is surfaced as incomplete coverage below, - // never silently dropped. - const boundedSubs = subscriptions.slice(0, MAX_SUBSCRIPTIONS); - const truncated = subscriptions.length > boundedSubs.length; + // Tier 2 (weak, logical): resource-group-level separation within the + // IN-SCOPE subscriptions only. const rgEnvSet = new Set(); const rgSamples: Array<{ name: string; environment: string }> = []; let anyRgReadFailed = false; - for (const sub of boundedSubs) { + for (const id of subscriptionIds) { let resourceGroups: ResourceGroup[]; try { resourceGroups = await armListAll( ctx, - `${ARM_BASE}/subscriptions/${sub.subscriptionId}/resourcegroups?api-version=${RESOURCE_GROUPS_API_VERSION}`, + `${ARM_BASE}/subscriptions/${id}/resourcegroups?api-version=${RESOURCE_GROUPS_API_VERSION}`, ); } catch (err) { anyRgReadFailed = true; ctx.log( - `Azure env-separation: could not list resource groups in ${sub.subscriptionId} — ${toHttpReadFailure(err).error}`, + `Azure env-separation: could not list resource groups in ${id} — ${toHttpReadFailure(err).error}`, ); continue; } @@ -156,64 +120,43 @@ export const environmentSeparationCheck: IntegrationCheck = { } } const resourceGroupEnvs = [...rgEnvSet]; - // A PASS means >=2 environments were positively found; scanning the - // remaining subscriptions could only ADD environments, never remove the two - // we already have, so truncation cannot invalidate a pass. The scanned count - // is still recorded in evidence for transparency. - if (resourceGroupEnvs.length >= 2) { + if (confirmsEnvironmentSeparation(resourceGroupEnvs)) { ctx.pass({ title: 'Environments separated across resource groups', - description: `Detected ${resourceGroupEnvs.length} distinct environments across resource groups in ${boundedSubs.length} subscription(s): ${resourceGroupEnvs.join(', ')}. Resource-group separation is logical — RGs share the subscription's access and network boundary — not full isolation.`, + description: `Detected production separated from non-production across resource groups in ${subscriptionIds.length} in-scope subscription(s): ${resourceGroupEnvs.join(', ')}. Resource-group separation is logical — RGs share the subscription's access and network boundary — not full isolation.`, resourceType: 'azure-environment-separation', resourceId: 'resource-groups', evidence: { boundary: 'resource-group', detectedEnvironments: resourceGroupEnvs, - enabledSubscriptions: subscriptions.length, - subscriptionsScannedForResourceGroups: boundedSubs.length, + subscriptionsScanned: subscriptionIds.length, resourceGroups: rgSamples, }, }); return; } - // Neither tier confirmed separation. Surface any incomplete coverage (read - // failures and/or the subscription scan cap) so the verdict is never - // presented as if it covered the whole footprint. + // Could not confirm. A read failure leaves coverage incomplete ("could not + // verify"); a complete scan that simply found no prod/non-prod split is + // "could not confirm". const detectedAll = [...new Set([...subscriptionEnvs, ...resourceGroupEnvs])]; - const coverageGaps: string[] = []; - if (truncated) { - coverageGaps.push( - `only ${boundedSubs.length} of ${subscriptions.length} enabled subscriptions were scanned for resource groups`, - ); - } - if (anyRgReadFailed) { - coverageGaps.push('some resource groups could not be read'); - } const base = detectedAll.length === 0 - ? `No Azure subscription or resource group could be classified by environment across ${subscriptions.length} subscription(s)` - : `Environment(s) detected (${detectedAll.join(', ')}) but not 2+ distinct environments at a single boundary (subscriptions, or resource groups)`; + ? `No in-scope Azure subscription or resource group could be classified by environment across ${subscriptionIds.length} subscription(s)` + : `Detected environment(s) ${detectedAll.join(', ')}, but could not confirm a production environment separated from a non-production one across ${subscriptionIds.length} in-scope subscription(s)`; ctx.fail({ - // Incomplete coverage that leaves us short of a verdict is "could not - // verify"; a complete scan that simply found no separation is "could not - // confirm". - title: - coverageGaps.length > 0 && detectedAll.length < 2 - ? 'Could not verify environment separation' - : 'Could not confirm environment separation', - description: `${base}${coverageGaps.length ? ` (${coverageGaps.join('; ')})` : ''}, so environment separation could not be confirmed.`, + title: anyRgReadFailed + ? 'Could not verify environment separation' + : 'Could not confirm environment separation', + description: `${base}${anyRgReadFailed ? ' (some resource groups could not be read)' : ''}.`, resourceType: 'azure-environment-separation', resourceId: 'subscriptions', severity: 'medium', - remediation: truncated - ? `Reduce to at most ${MAX_SUBSCRIPTIONS} enabled subscriptions (or contact support to raise the limit). ${GUIDANCE}` - : GUIDANCE, + remediation: GUIDANCE, evidence: { subscriptionEnvironments: subscriptionEnvs, resourceGroupEnvironments: resourceGroupEnvs, - enabledSubscriptions: subscriptions.length, - subscriptionsScannedForResourceGroups: boundedSubs.length, + subscriptionsScanned: subscriptionIds.length, resourceGroupsClassified: rgSamples.length, }, }); diff --git a/packages/integration-platform/src/manifests/environment-classification.ts b/packages/integration-platform/src/manifests/environment-classification.ts index fe8355bf7..4325dd1f9 100644 --- a/packages/integration-platform/src/manifests/environment-classification.ts +++ b/packages/integration-platform/src/manifests/environment-classification.ts @@ -23,6 +23,24 @@ const ENV_TOKEN_SETS: ReadonlyArray<{ env: string; tokens: ReadonlySet } /** Default tag/label keys that conventionally carry the environment. */ export const ENV_TAG_KEYS = ['environment', 'env', 'stage', 'tier'] as const; +/** The single production bucket; every other bucket is non-production. */ +const PRODUCTION_ENV = 'production'; + +/** + * Whether a set of detected environments confirms environment SEPARATION as the + * control intends: production must be present AND at least one non-production + * environment (staging/development/test/sandbox). Two non-production + * environments alone (e.g. dev + staging) do NOT demonstrate that production is + * segregated, so they must not pass. + */ +export function confirmsEnvironmentSeparation( + envs: ReadonlyArray, +): boolean { + return ( + envs.includes(PRODUCTION_ENV) && envs.some((e) => e !== PRODUCTION_ENV) + ); +} + /** Split on any run of non-alphanumeric chars; lowercased, empties removed. */ function tokenize(value: string): string[] { return value diff --git a/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts b/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts index fa75eb6b2..2f7703101 100644 --- a/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts +++ b/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts @@ -921,9 +921,13 @@ describe('GCP environment-separation check', () => { (err as Error & { status: number }).status = code; return err; }; + // `variables: {}` forces unscoped discovery (the project_ids-less default), + // so the mock can return the `/v1/projects` list shape. + const UNSCOPED = {}; - it('passes when ≥2 distinct environments are detected by name', async () => { + it('passes when production is separated from a non-production env (by name)', async () => { const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, fetch: () => ({ projects: [{ projectId: 'myapp-prod' }, { projectId: 'myapp-dev' }], }), @@ -936,8 +940,9 @@ describe('GCP environment-separation check', () => { }); }); - it('passes when environments are distinguished by labels', async () => { + it('passes when environments are distinguished by labels (prod + staging)', async () => { const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, fetch: () => ({ projects: [ { projectId: 'a', labels: { environment: 'production' } }, @@ -949,8 +954,9 @@ describe('GCP environment-separation check', () => { expect(out.passed).toHaveLength(1); }); - it('evaluates projects across multiple pages', async () => { + it('evaluates projects across multiple pages (unscoped discovery)', async () => { const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, fetch: (url) => { if (url.includes('pageToken=tok2')) { return { projects: [{ projectId: 'app-dev' }] }; @@ -962,8 +968,39 @@ describe('GCP environment-separation check', () => { expect(out.passed).toHaveLength(1); }); - it('fails with guidance when only one environment is present', async () => { + it('honors project_ids scope: fetches selected projects, never lists all', async () => { const out = await runCheck(environmentSeparationCheck, { + variables: { project_ids: ['p-prod', 'p-dev'] }, + fetch: (url) => { + if (url.includes('/v1/projects/p-prod')) { + return { projectId: 'p-prod', labels: { environment: 'production' } }; + } + if (url.includes('/v1/projects/p-dev')) { + return { projectId: 'p-dev', labels: { environment: 'development' } }; + } + // The list endpoint must NOT be called when projects are selected. + throw new Error(`unexpected list call: ${url}`); + }, + }); + expect(out.failed).toHaveLength(0); + expect(out.passed).toHaveLength(1); + }); + + it('fails when only non-production environments are present (no production)', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => ({ + projects: [{ projectId: 'app-dev' }, { projectId: 'app-staging' }], + }), + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not confirm environment separation/); + }); + + it('fails when only production is present (no non-production)', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, fetch: () => ({ projects: [{ projectId: 'myapp-prod' }] }), }); expect(out.passed).toHaveLength(0); @@ -972,8 +1009,9 @@ describe('GCP environment-separation check', () => { expect(out.failed[0]!.remediation).toMatch(/distinct GCP projects/); }); - it('fails with guidance when no project can be classified', async () => { + it('fails when no project can be classified', async () => { const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, fetch: () => ({ projects: [{ projectId: 'product-catalog' }, { projectId: 'backend' }], }), @@ -983,8 +1021,25 @@ describe('GCP environment-separation check', () => { expect(out.failed[0]!.title).toMatch(/Could not confirm environment separation/); }); - it('fails when no projects are accessible', async () => { + it('surfaces truncation as "could not verify" when discovery is capped', async () => { + // Every page returns more pages → the 20-page cap trips; classify only + // non-prod so the verdict is a fail that must disclose the partial scan. const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => ({ + projects: [{ projectId: 'app-dev' }], + nextPageToken: 'next', + }), + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not verify environment separation/); + expect(out.failed[0]!.evidence).toMatchObject({ discoveryTruncated: true }); + }); + + it('fails when no projects are accessible (unscoped)', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, fetch: () => ({ projects: [] }), }); expect(out.passed).toHaveLength(0); @@ -992,8 +1047,21 @@ describe('GCP environment-separation check', () => { expect(out.failed[0]!.title).toMatch(/No GCP projects detected/); }); - it('fails "could not verify" when the project list read fails', async () => { + it('fails "could not verify" when unscoped discovery read fails', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => { + throw status(new Error('HTTP 403: Forbidden'), 403); + }, + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not verify environment separation/); + }); + + it('fails "could not verify" when a selected (scoped) project cannot be read', async () => { const out = await runCheck(environmentSeparationCheck, { + variables: { project_ids: ['p1'] }, fetch: () => { throw status(new Error('HTTP 403: Forbidden'), 403); }, diff --git a/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts b/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts index b789b2b83..262dd53df 100644 --- a/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts +++ b/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts @@ -2,6 +2,7 @@ import { TASK_TEMPLATES } from '../../../task-mappings'; import type { CheckContext, IntegrationCheck } from '../../../types'; import { classifyEnvironment, + confirmsEnvironmentSeparation, envTagValues, } from '../../environment-classification'; import { @@ -15,6 +16,14 @@ interface GcpProject { labels?: Record; } +interface ResolvedProjects { + projects: GcpProject[]; + /** True when UNSCOPED discovery hit the page cap (a scoped fetch is exact). */ + truncated: boolean; + /** Set when a selected project could not be read (scoped path). */ + readError?: string; +} + /** * Classify a project into an environment bucket, or null if undetermined. * Explicit `environment`/`env` label values are most authoritative, then the @@ -30,8 +39,44 @@ export function classifyProjectEnv(project: GcpProject): string | null { ]); } -/** List every active project the connection can see (bounded pagination). */ -async function listActiveProjects(ctx: CheckContext): Promise { +/** The user-selected project scope (`project_ids` variable), trimmed. */ +function selectedProjectIds(ctx: CheckContext): string[] { + const selected = ctx.variables.project_ids; + if (!Array.isArray(selected)) return []; + return selected + .filter((s): s is string => typeof s === 'string') + .map((s) => s.trim()) + .filter((s) => s.length > 0); +} + +/** + * Resolve the projects to evaluate, HONORING the `project_ids` opt-in scope: + * when projects are selected we fetch exactly those (no truncation possible); + * otherwise we discover all active projects with a bounded page walk and report + * `truncated` so a partial footprint is never presented as a complete verdict. + */ +async function resolveProjects(ctx: CheckContext): Promise { + const selected = selectedProjectIds(ctx); + + if (selected.length > 0) { + const projects: GcpProject[] = []; + let readError: string | undefined; + for (const id of selected) { + try { + const project = await ctx.fetch( + `/v1/projects/${encodeURIComponent(id)}`, + ); + if (project && typeof project.projectId === 'string') { + projects.push(project); + } + } catch (err) { + readError = toHttpReadFailure(err).error; + ctx.log(`GCP env-separation: could not read project ${id} — ${readError}`); + } + } + return { projects, truncated: false, readError }; + } + const projects: GcpProject[] = []; let pageToken: string | undefined; let pages = 0; @@ -50,28 +95,25 @@ async function listActiveProjects(ctx: CheckContext): Promise { typeof data.nextPageToken === 'string' ? data.nextPageToken : undefined; pages++; } while (pageToken && pages < 20); - if (pageToken) { - ctx.warn( - 'GCP project discovery hit the page cap; some projects may not be evaluated for environment separation', - { pages, discovered: projects.length }, - ); - } - return projects; + + return { projects, truncated: Boolean(pageToken) }; } +const GUIDANCE = + 'Separate production and non-production workloads into distinct GCP projects and label each with an `environment` label (e.g. environment=production, environment=staging). If you separate environments another way (e.g. VPCs or folders), upload a console screenshot or architecture diagram as evidence.'; + /** * Separation of Environments check (heuristic). GCP's recommended pattern is a * separate project per environment, so this infers separation from the project - * footprint: it classifies each accessible project into an environment (by - * `environment`/`env` label, else name/id token) and passes when TWO OR MORE - * distinct environments are present. + * footprint: it classifies each in-scope project into an environment (by + * `environment`/`env` label, else name/id token) and passes only when it can + * confirm a PRODUCTION environment is separated from at least one + * NON-PRODUCTION environment. * - * It is intentionally evidence-first, not a hard gate: separation is an - * architectural property with no single API field, so when it cannot confirm + * It honors the `project_ids` opt-in scope, never presents a truncated + * discovery as complete, and is evidence-first: when it cannot confirm * separation it emits actionable guidance (label projects, or upload a diagram) - * rather than a silent pass. It evaluates ALL accessible projects regardless of - * the `project_ids` scoping variable, since separation is about the whole - * footprint. + * rather than a silent pass. */ export const environmentSeparationCheck: IntegrationCheck = { id: 'gcp-environment-separation', @@ -82,9 +124,9 @@ export const environmentSeparationCheck: IntegrationCheck = { taskMapping: TASK_TEMPLATES.separationOfEnvironments, run: async (ctx: CheckContext) => { - let projects: GcpProject[]; + let resolved: ResolvedProjects; try { - projects = await listActiveProjects(ctx); + resolved = await resolveProjects(ctx); } catch (err) { const failure = toHttpReadFailure(err); ctx.fail({ @@ -102,17 +144,25 @@ export const environmentSeparationCheck: IntegrationCheck = { return; } + const { projects, truncated, readError } = resolved; + if (projects.length === 0) { + // A read failure (scoped projects unreadable) is "could not verify"; a + // genuinely empty footprint is "no projects". ctx.fail({ - title: 'No GCP projects detected', - description: - 'No active GCP projects were accessible, so environment separation could not be evaluated.', + title: readError + ? 'Could not verify environment separation' + : 'No GCP projects detected', + description: readError + ? `Selected GCP projects could not be read (${readError}), so environment separation could not be evaluated.` + : 'No GCP projects were in scope, so environment separation could not be evaluated.', resourceType: 'gcp-environment-separation', resourceId: 'projects', severity: 'medium', - remediation: - 'Grant resourcemanager.projects.list to the connection (or select projects in the integration settings), then re-run — or upload a console screenshot / architecture diagram as evidence of environment separation.', - evidence: { projectCount: 0 }, + remediation: readError + ? `Grant resourcemanager.projects.get (e.g. roles/viewer) for the selected projects, then re-run. ${GUIDANCE}` + : `Grant resourcemanager.projects.list to the connection (or select projects in the integration settings), then re-run. ${GUIDANCE}`, + evidence: { projectCount: 0, ...(readError ? { readError } : {}) }, }); return; } @@ -128,47 +178,59 @@ export const environmentSeparationCheck: IntegrationCheck = { .filter((e): e is string => e !== null), ), ]; + const sample = classified.slice(0, 50).map((c) => ({ + projectId: c.projectId, + environment: c.environment ?? 'unclassified', + })); - if (detected.length >= 2) { + // A confirmed pass requires production + a non-production environment. + // Truncation/read gaps cannot turn a confirmed pass into a wrong one + // (scanning more projects only ADDS environments), so a pass stands. + if (confirmsEnvironmentSeparation(detected)) { ctx.pass({ title: 'Environments separated across projects', - description: `Detected ${detected.length} distinct environments across ${projects.length} GCP project(s): ${detected.join(', ')}.`, + description: `Detected production separated from non-production across ${projects.length} GCP project(s): ${detected.join(', ')}.`, resourceType: 'gcp-environment-separation', resourceId: 'projects', evidence: { detectedEnvironments: detected, projectCount: projects.length, - projects: classified - .slice(0, 50) - .map((c) => ({ - projectId: c.projectId, - environment: c.environment ?? 'unclassified', - })), + projects: sample, }, }); return; } + // Could not confirm. Surface any incomplete coverage so a partial footprint + // is never presented as a complete "not separated" verdict. + const coverageGaps: string[] = []; + if (truncated) { + coverageGaps.push( + 'project discovery hit the page cap, so not all projects were evaluated', + ); + } + if (readError) { + coverageGaps.push('some selected projects could not be read'); + } + const base = + detected.length === 0 + ? `No GCP project could be classified by environment across ${projects.length} project(s)` + : `Detected environment(s) ${detected.join(', ')}, but could not confirm a production environment separated from a non-production one across ${projects.length} project(s)`; ctx.fail({ - title: 'Could not confirm environment separation', - description: - detected.length === 1 - ? `Only one environment ("${detected[0]}") was detected across ${projects.length} project(s); production and non-production do not appear to be separated into distinct projects.` - : `No GCP project could be classified by environment across ${projects.length} project(s), so environment separation could not be confirmed.`, + title: + coverageGaps.length > 0 + ? 'Could not verify environment separation' + : 'Could not confirm environment separation', + description: `${base}${coverageGaps.length ? ` (${coverageGaps.join('; ')})` : ''}.`, resourceType: 'gcp-environment-separation', resourceId: 'projects', severity: 'medium', - remediation: - 'Separate production and non-production workloads into distinct GCP projects and label each with an `environment` label (e.g. environment=production, environment=staging). If you separate environments another way (e.g. VPCs or folders), upload a console screenshot or architecture diagram as evidence.', + remediation: GUIDANCE, evidence: { detectedEnvironments: detected, projectCount: projects.length, - projects: classified - .slice(0, 50) - .map((c) => ({ - projectId: c.projectId, - environment: c.environment ?? 'unclassified', - })), + ...(truncated ? { discoveryTruncated: true } : {}), + projects: sample, }, }); }, From 1420c1c6362f47e8e860d2c1fad0289d0d8b7463 Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 17 Jun 2026 19:36:35 -0400 Subject: [PATCH 5/6] fix(integrations): env-separation classifier negation + read-failure verdicts Addresses three correctness issues flagged in review of the Separation of Environments checks (cubic-dev-ai bot): 1. Shared classifier read negated labels as production. "non-prod" tokenized to ["non","prod"] and matched the bare "prod" token, classifying as production. That both hid real separation (prod + "non-prod" collapsed to production-only -> false fail) and fabricated production (only non-prod envs -> false pass). A production token immediately preceded by "non"/"not" (or a joined "nonprod") now classifies as non-production, and "pre-prod" classifies as staging (matching the already-handled joined "preprod"). 2. Azure check swallowed Tier-1 subscription read failures: only resource-group read failures drove the "could not verify" vs "could not confirm" wording, so an unreadable subscription name could still yield a confident verdict. A read failure in either tier now marks coverage incomplete. 3. AWS check emitted a region-read "could not verify" fail even when the VPCs it did read already confirmed separation -- a contradictory pass+fail in one run. Extracted a pure buildEnvironmentSeparationOutcomes() that suppresses the region-failure finding once separation is confirmed (reading more regions can only add environments, never un-confirm a pass). Adds regression tests for all three (393 pass). No DB/API changes; the shared classifier's new 'non-production' value only flows into display strings and the production-vs-non-production check, verified safe across both repos. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../environment-classification.test.ts | 57 +++++++++++++ .../aws/checks/__tests__/aws-checks.test.ts | 49 ++++++++++++ .../aws/checks/environment-separation.ts | 80 ++++++++++++------- .../checks/__tests__/azure-checks.test.ts | 21 +++++ .../azure/checks/environment-separation.ts | 18 +++-- .../manifests/environment-classification.ts | 79 ++++++++++++++++-- 6 files changed, 264 insertions(+), 40 deletions(-) diff --git a/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts b/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts index 5d4fd0453..30fd3a947 100644 --- a/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts +++ b/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts @@ -66,6 +66,63 @@ describe('classifyEnvironment — token-exact matching', () => { }); }); +describe('classifyEnvironment — negated/qualified production (cubic finding)', () => { + it('classifies separated negated production as NON-production, not production', () => { + expect(classifyEnvironment(['non-prod'])).toBe('non-production'); + expect(classifyEnvironment(['non_prod'])).toBe('non-production'); + expect(classifyEnvironment(['non.prod'])).toBe('non-production'); + expect(classifyEnvironment(['not-prod'])).toBe('non-production'); + expect(classifyEnvironment(['myapp-non-prod'])).toBe('non-production'); + expect(classifyEnvironment(['non-production'])).toBe('non-production'); + expect(classifyEnvironment(['NON-PROD'])).toBe('non-production'); // case-insensitive + }); + + it('classifies joined non-production spellings as NON-production', () => { + expect(classifyEnvironment(['nonprod'])).toBe('non-production'); + expect(classifyEnvironment(['notprod'])).toBe('non-production'); + expect(classifyEnvironment(['nonprd'])).toBe('non-production'); + expect(classifyEnvironment(['notprd'])).toBe('non-production'); + expect(classifyEnvironment(['nonproduction'])).toBe('non-production'); + expect(classifyEnvironment(['notproduction'])).toBe('non-production'); + expect(classifyEnvironment(['app-nonprod'])).toBe('non-production'); + }); + + it('classifies pre-prod as staging (consistent with joined "preprod")', () => { + expect(classifyEnvironment(['pre-prod'])).toBe('staging'); + expect(classifyEnvironment(['pre_prod'])).toBe('staging'); + expect(classifyEnvironment(['app-pre-prod'])).toBe('staging'); + expect(classifyEnvironment(['preprod'])).toBe('staging'); + }); + + it('still classifies plain production (negation needs an ADJACENT qualifier)', () => { + expect(classifyEnvironment(['prod'])).toBe('production'); + expect(classifyEnvironment(['myapp-prod'])).toBe('production'); + // a "non" that does not immediately precede a prod token must NOT negate + expect(classifyEnvironment(['prod-non-critical'])).toBe('production'); + }); + + it('end-to-end: prod + non-prod now CONFIRMS separation (was a false fail)', () => { + // Pre-fix, "non-prod" classified as production, so detected={production} + // and separation failed despite a real prod/non-prod split. + const detected = [ + ...new Set(['prod-vpc', 'non-prod-vpc'].map((n) => classifyEnvironment([n]))), + ].filter((e): e is string => e !== null); + expect(detected).toContain('production'); + expect(detected).toContain('non-production'); + expect(confirmsEnvironmentSeparation(detected)).toBe(true); + }); + + it('end-to-end: a non-prod-only footprint does NOT fabricate production (was a false pass)', () => { + // Pre-fix, "non-prod-staging" classified as production, so dev + that string + // passed as if production existed. + const detected = [ + ...new Set(['dev', 'non-prod-staging'].map((n) => classifyEnvironment([n]))), + ].filter((e): e is string => e !== null); + expect(detected).not.toContain('production'); + expect(confirmsEnvironmentSeparation(detected)).toBe(false); + }); +}); + describe('envTagValues — only env-key tags, case-insensitive', () => { it('reads environment-indicating keys regardless of case', () => { expect(envTagValues({ Environment: 'production' })).toEqual(['production']); diff --git a/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts b/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts index 89b42dfa5..481cf4dc9 100644 --- a/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts +++ b/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from 'bun:test'; import { evaluateCloudTrail } from '../cloudtrail'; import { evaluateSecurityGroups } from '../ec2'; import { + buildEnvironmentSeparationOutcomes, classifyVpcEnv, evaluateEnvironmentSeparation, } from '../environment-separation'; @@ -1015,3 +1016,51 @@ describe('AWS environment separation', () => { expect(out[0]!.evidence).toMatchObject({ vpcCount: 0 }); }); }); + +describe('buildEnvironmentSeparationOutcomes — region failures vs verdict (cubic finding)', () => { + const failure = { error: 'AccessDenied: ec2:DescribeVpcs', denied: true }; + const regionFailures = [{ region: 'eu-west-1', failure }]; + + it('does NOT pair a region-failure fail with a confirmed pass', () => { + const out = buildEnvironmentSeparationOutcomes( + [ + { vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }, + { vpcId: 'vpc-2', region: 'us-east-1', environment: 'development' }, + ], + regionFailures, + ); + // A confirmed pass stands alone — more regions can only ADD environments, so + // the unread region can't un-confirm it; emitting a fail too would be a + // contradictory pass+fail in one run. + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('pass'); + }); + + it('surfaces the region failure alongside an UNconfirmed verdict (both negative)', () => { + const out = buildEnvironmentSeparationOutcomes( + [{ vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }], + regionFailures, + ); + expect(out.length).toBeGreaterThanOrEqual(2); + expect(out.every((o) => o.kind === 'fail')).toBe(true); + expect(out.some((o) => /Could not verify VPCs in some regions/.test(o.title))).toBe(true); + }); + + it('returns only the region-failure finding when zero VPCs were read', () => { + const out = buildEnvironmentSeparationOutcomes([], regionFailures); + expect(out).toHaveLength(1); + expect(out[0]!.title).toMatch(/Could not verify VPCs in some regions/); + }); + + it('with no region failures, returns the separation verdict unchanged', () => { + const out = buildEnvironmentSeparationOutcomes( + [ + { vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }, + { vpcId: 'vpc-2', region: 'us-east-1', environment: 'staging' }, + ], + [], + ); + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('pass'); + }); +}); diff --git a/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts b/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts index 142c4eec1..89021947b 100644 --- a/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts +++ b/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts @@ -128,6 +128,55 @@ export function evaluateEnvironmentSeparation(vpcs: VpcInfo[]): CheckOutcome[] { ]; } +/** + * Combine the VPCs we read with any per-region read failures into the outcomes + * to emit. A region we couldn't read leaves coverage incomplete and is surfaced + * as its own "could not verify" finding — UNLESS the VPCs we DID read already + * confirm separation. Reading more regions can only ADD environments, so it can + * never un-confirm a positive result; pairing a confirmed pass with a + * verification-failure would only emit a contradictory pass+fail in one run. + * When zero VPCs were read AND a region failed, only the region-failure finding + * is returned — a "no VPCs" verdict layered on unread data would mislead. + */ +export function buildEnvironmentSeparationOutcomes( + vpcs: VpcInfo[], + regionFailures: ReadonlyArray<{ region: string; failure: ReadFailure }>, +): CheckOutcome[] { + const separation = evaluateEnvironmentSeparation(vpcs); + const confirmed = separation.some((o) => o.kind === 'pass'); + + // No coverage gap, or separation already proven → the verdict stands alone. + if (regionFailures.length === 0 || confirmed) return separation; + + const regions = regionFailures.map((r) => r.region); + const regionFailure: CheckOutcome = { + kind: 'fail', + title: 'Could not verify VPCs in some regions', + description: `VPCs could not be listed in: ${regions.join(', ')}, so environment separation is unverified in those regions.`, + resourceType: 'aws-environment-separation', + resourceId: `regions:${regions.join(',')}`, + severity: 'medium', + remediation: remediationForReadFailure( + combineReadFailures(regionFailures.map((r) => r.failure)), + 'Grant ec2:DescribeVpcs to the integration role in all enabled regions, then re-run the check.', + ), + evidence: { + failedRegions: regionFailures.map((r) => ({ + region: r.region, + error: r.failure.error, + })), + }, + }; + + // Zero VPCs read + a region failure: the unverified-region finding already + // tells the story on its own. + if (vpcs.length === 0) return [regionFailure]; + + // Coverage gap AND we couldn't confirm from what we read: surface both — + // they're consistent (both negative). + return [regionFailure, ...separation]; +} + /** * Separation of Environments check (heuristic, within-account). Lists every * non-default VPC across the account's regions, classifies each by its @@ -189,35 +238,6 @@ export const environmentSeparationCheck: IntegrationCheck = { } } - // A region we couldn't read is unverified — surface it instead of letting a - // partial read failure end as a silent verdict. - if (regionFailures.length > 0) { - const regions = regionFailures.map((r) => r.region); - emitOutcomes(ctx, [ - { - kind: 'fail', - title: 'Could not verify VPCs in some regions', - description: `VPCs could not be listed in: ${regions.join(', ')}, so environment separation is unverified in those regions.`, - resourceType: 'aws-environment-separation', - resourceId: `regions:${regions.join(',')}`, - severity: 'medium', - remediation: remediationForReadFailure( - combineReadFailures(regionFailures.map((r) => r.failure)), - 'Grant ec2:DescribeVpcs to the integration role in all enabled regions, then re-run the check.', - ), - evidence: { - failedRegions: regionFailures.map((r) => ({ - region: r.region, - error: r.failure.error, - })), - }, - }, - ]); - // If we also found zero VPCs, the unverified-region finding already tells - // the story — don't add a misleading "no VPCs" verdict on unread data. - if (vpcs.length === 0) return; - } - - emitOutcomes(ctx, evaluateEnvironmentSeparation(vpcs)); + emitOutcomes(ctx, buildEnvironmentSeparationOutcomes(vpcs, regionFailures)); }, }; diff --git a/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts b/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts index fbc154d9c..d763601ea 100644 --- a/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts +++ b/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts @@ -1014,6 +1014,27 @@ describe('Azure environment separation', () => { expect(failed.some((f) => /Could not verify environment separation/.test(f.title))).toBe(true); }); + it('fails "could not verify" when a SUBSCRIPTION name read fails (cubic finding)', async () => { + // Tier-1 displayName read fails while resource-group listing succeeds but + // classifies nothing. Coverage is incomplete, so the verdict must be the + // retry-signalling "could not verify", not the confident "could not confirm". + const { passed, failed } = await run( + environmentSeparationCheck, + (url) => { + const subM = url.match(/\/subscriptions\/([^/?]+)\?api-version/); + if (subM) throw new Error('HTTP 403: Forbidden'); + if (url.includes('/resourcegroups')) { + return { value: [{ id: 'a', name: 'backend' }] }; + } + return {}; + }, + { subscription_ids: ['s1'] }, + ); + expect(passed).toHaveLength(0); + expect(failed.some((f) => /Could not verify environment separation/.test(f.title))).toBe(true); + expect(failed.some((f) => /Could not confirm environment separation/.test(f.title))).toBe(false); + }); + it('defers to the scope resolver when no subscription is in scope', async () => { // variables {} → discovery; no enabled subscription → resolveAzureSubscriptionIds // emits its own scope finding and the check early-returns (no double fail). diff --git a/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts index 265b8fd5a..f9315b27e 100644 --- a/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts +++ b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts @@ -63,6 +63,7 @@ export const environmentSeparationCheck: IntegrationCheck = { // subscription's display name only — we never touch subscriptions outside // the configured selection. const subscriptionEnvSet = new Set(); + let anySubscriptionReadFailed = false; for (const id of subscriptionIds) { try { const sub = await ctx.fetch<{ displayName?: string }>( @@ -71,6 +72,7 @@ export const environmentSeparationCheck: IntegrationCheck = { const env = classifyEnvironment([sub.displayName]); if (env) subscriptionEnvSet.add(env); } catch (err) { + anySubscriptionReadFailed = true; ctx.log( `Azure env-separation: could not read subscription ${id} — ${toHttpReadFailure(err).error}`, ); @@ -136,19 +138,24 @@ export const environmentSeparationCheck: IntegrationCheck = { return; } - // Could not confirm. A read failure leaves coverage incomplete ("could not - // verify"); a complete scan that simply found no prod/non-prod split is - // "could not confirm". + // Could not confirm. A read failure in EITHER tier (a subscription name we + // couldn't read, or resource groups we couldn't list) leaves coverage + // incomplete, so the verdict is "could not verify" (retry/permissions) — not + // the confident "could not confirm" (a complete scan that found no split). const detectedAll = [...new Set([...subscriptionEnvs, ...resourceGroupEnvs])]; + const readGaps: string[] = []; + if (anySubscriptionReadFailed) readGaps.push('subscriptions'); + if (anyRgReadFailed) readGaps.push('resource groups'); + const coverageIncomplete = readGaps.length > 0; const base = detectedAll.length === 0 ? `No in-scope Azure subscription or resource group could be classified by environment across ${subscriptionIds.length} subscription(s)` : `Detected environment(s) ${detectedAll.join(', ')}, but could not confirm a production environment separated from a non-production one across ${subscriptionIds.length} in-scope subscription(s)`; ctx.fail({ - title: anyRgReadFailed + title: coverageIncomplete ? 'Could not verify environment separation' : 'Could not confirm environment separation', - description: `${base}${anyRgReadFailed ? ' (some resource groups could not be read)' : ''}.`, + description: `${base}${coverageIncomplete ? ` (some ${readGaps.join(' and ')} could not be read)` : ''}.`, resourceType: 'azure-environment-separation', resourceId: 'subscriptions', severity: 'medium', @@ -158,6 +165,7 @@ export const environmentSeparationCheck: IntegrationCheck = { resourceGroupEnvironments: resourceGroupEnvs, subscriptionsScanned: subscriptionIds.length, resourceGroupsClassified: rgSamples.length, + ...(coverageIncomplete ? { coverageIncomplete: true } : {}), }, }); }, diff --git a/packages/integration-platform/src/manifests/environment-classification.ts b/packages/integration-platform/src/manifests/environment-classification.ts index 4325dd1f9..843df25d0 100644 --- a/packages/integration-platform/src/manifests/environment-classification.ts +++ b/packages/integration-platform/src/manifests/environment-classification.ts @@ -9,11 +9,25 @@ * token is compared exactly to a set of environment keywords. This is why * "production"/"product" and "dev"/"developer" never collide, and why separator * style doesn't matter (`myapp-prod`, `myapp_prod`, `myapp.prod` all classify). + * + * Qualifiers are honored: a production keyword that is negated ("non-prod", + * "not-prod", or the joined "nonprod") classifies as NON-PRODUCTION, and a + * pre-production keyword ("pre-prod", matching the joined "preprod") classifies + * as STAGING — never as plain production. Without this a `non-prod` label would + * read as production and corrupt the prod-vs-non-prod separation verdict. */ +/** Production keywords — defined once so the qualifier pass below can reuse them. */ +const PRODUCTION_TOKENS: ReadonlySet = new Set([ + 'prod', + 'production', + 'prd', + 'live', +]); + const ENV_TOKEN_SETS: ReadonlyArray<{ env: string; tokens: ReadonlySet }> = [ // Production is first so it wins ties when a string carries multiple tokens. - { env: 'production', tokens: new Set(['prod', 'production', 'prd', 'live']) }, + { env: 'production', tokens: PRODUCTION_TOKENS }, { env: 'staging', tokens: new Set(['staging', 'stage', 'stg', 'preprod', 'uat']) }, { env: 'development', tokens: new Set(['dev', 'develop', 'development']) }, { env: 'test', tokens: new Set(['test', 'testing', 'qa']) }, @@ -26,6 +40,40 @@ export const ENV_TAG_KEYS = ['environment', 'env', 'stage', 'tier'] as const; /** The single production bucket; every other bucket is non-production. */ const PRODUCTION_ENV = 'production'; +/** The staging bucket — also where pre-production ("pre-prod") classifies. */ +const STAGING_ENV = 'staging'; + +/** + * Canonical bucket for an explicitly non-production label ("non-prod", + * "nonprod"). It says only "not production" — which is exactly what the + * separation control needs: it counts as a non-production environment. + */ +const NON_PRODUCTION_ENV = 'non-production'; + +/** + * Joined non-production spellings: the qualifier and production word run + * together with no separator ("nonprod"), so they survive `tokenize` as a single + * token and are matched whole. Separated forms ("non-prod") are caught by the + * adjacency check in `classifyTokens`. + */ +const NON_PRODUCTION_TOKENS: ReadonlySet = new Set([ + 'nonprod', + 'nonprd', + 'nonproduction', + 'notprod', + 'notprd', + 'notproduction', +]); + +/** + * Qualifier tokens checked against the token IMMEDIATELY before a production + * token. Only production is qualified — it's the one bucket where a missed + * qualifier flips the prod-vs-non-prod verdict — so a stray "non"/"pre" + * elsewhere in a name is ignored. + */ +const PRODUCTION_NEGATORS: ReadonlySet = new Set(['non', 'not']); +const PREPROD_QUALIFIERS: ReadonlySet = new Set(['pre']); + /** * Whether a set of detected environments confirms environment SEPARATION as the * control intends: production must be present AND at least one non-production @@ -49,6 +97,29 @@ function tokenize(value: string): string[] { .filter((t) => t.length > 0); } +/** + * Classify a single tokenized candidate, or null. A qualified production token + * is resolved FIRST — "non-prod"/"not-prod" (and the joined "nonprod") → + * non-production, "pre-prod" → staging — so the bare production keyword below + * can't win it back. Otherwise tokens are matched exactly against each + * environment set (production first, so it wins when several tokens are present). + */ +function classifyTokens(tokens: string[]): string | null { + let prev: string | undefined; + for (const token of tokens) { + if (NON_PRODUCTION_TOKENS.has(token)) return NON_PRODUCTION_ENV; + if (PRODUCTION_TOKENS.has(token) && prev) { + if (PRODUCTION_NEGATORS.has(prev)) return NON_PRODUCTION_ENV; + if (PREPROD_QUALIFIERS.has(prev)) return STAGING_ENV; + } + prev = token; + } + for (const { env, tokens: keywords } of ENV_TOKEN_SETS) { + if (tokens.some((t) => keywords.has(t))) return env; + } + return null; +} + /** * Classify a list of candidate strings (e.g. an environment tag/label value, * then a resource name) into a canonical environment, or null if none match. @@ -60,10 +131,8 @@ export function classifyEnvironment( ): string | null { for (const candidate of candidates) { if (!candidate) continue; - const tokens = tokenize(candidate); - for (const { env, tokens: keywords } of ENV_TOKEN_SETS) { - if (tokens.some((t) => keywords.has(t))) return env; - } + const env = classifyTokens(tokenize(candidate)); + if (env) return env; } return null; } From ee53b72495fa90cd3760bcf1690b10c9ecf1f196 Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 17 Jun 2026 19:59:50 -0400 Subject: [PATCH 6/6] fix(integration-platform): fail closed on capped azure env pagination --- .../__tests__/environment-separation.test.ts | 89 +++++++++++++++++++ .../azure/checks/environment-separation.ts | 54 +++++++---- .../src/manifests/azure/checks/shared.ts | 32 +++++-- 3 files changed, 153 insertions(+), 22 deletions(-) create mode 100644 packages/integration-platform/src/manifests/azure/checks/__tests__/environment-separation.test.ts diff --git a/packages/integration-platform/src/manifests/azure/checks/__tests__/environment-separation.test.ts b/packages/integration-platform/src/manifests/azure/checks/__tests__/environment-separation.test.ts new file mode 100644 index 000000000..a9c421175 --- /dev/null +++ b/packages/integration-platform/src/manifests/azure/checks/__tests__/environment-separation.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from 'bun:test'; +import type { + CheckContext, + CheckFindingResult, + CheckPassingResult, + CheckVariableValues, +} from '../../../../types'; +import { environmentSeparationCheck } from '../environment-separation'; + +interface CapturedResults { + passed: CheckPassingResult[]; + failed: CheckFindingResult[]; +} + +async function runEnvironmentSeparation({ + fetch, + variables = { subscription_id: 'sub-1' }, +}: { + fetch: (url: string) => unknown; + variables?: CheckVariableValues; +}): Promise { + const passed: CheckPassingResult[] = []; + const failed: CheckFindingResult[] = []; + const ctx = { + accessToken: 'tok', + credentials: {}, + variables, + connectionId: 'connection-id', + organizationId: 'organization-id', + metadata: {}, + log: () => {}, + warn: () => {}, + error: () => {}, + pass: (result) => passed.push(result), + fail: (finding) => failed.push(finding), + addPassingResult: () => {}, + addFinding: () => {}, + fetch: async (url: string) => fetch(url), + post: async () => ({}), + put: async () => ({}), + patch: async () => ({}), + delete: async () => ({}), + graphql: async () => ({}), + fetchAllPages: async () => [], + fetchWithCursor: async () => [], + fetchWithLinkHeader: async () => [], + getState: async () => null, + setState: async () => {}, + } as CheckContext; + + await environmentSeparationCheck.run(ctx); + return { passed, failed }; +} + +describe('Azure environment separation pagination coverage', () => { + it('does not emit a resource-group pass when ARM pagination hits the page cap', async () => { + const out = await runEnvironmentSeparation({ + fetch: (url) => { + if (url.match(/\/subscriptions\/sub-1\?api-version/)) { + return { displayName: 'Company' }; + } + + if (url.includes('/resourcegroups')) { + const pageMatch = url.match(/[?&]page=(\d+)/); + const page = pageMatch ? Number(pageMatch[1]) : 0; + const name = page === 0 ? 'rg-prod' : 'rg-dev'; + + return { + value: [{ id: `rg-${page}`, name }], + nextLink: `https://management.azure.com/subscriptions/sub-1/resourcegroups?page=${ + page + 1 + }`, + }; + } + + return {}; + }, + }); + + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not verify environment separation/); + expect(out.failed[0]!.evidence).toMatchObject({ + coverageIncomplete: true, + resourceGroupCoverageGaps: ['page-cap'], + resourceGroupCoverageGapSubscriptions: ['sub-1'], + }); + }); +}); diff --git a/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts index f9315b27e..d48c1ebce 100644 --- a/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts +++ b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts @@ -6,7 +6,7 @@ import { envTagValues, } from '../../environment-classification'; import { toHttpReadFailure } from '../../http-read-failure'; -import { ARM_BASE, armListAll, resolveAzureSubscriptionIds } from './shared'; +import { ARM_BASE, armListAllWithCoverage, resolveAzureSubscriptionIds } from './shared'; const SUBSCRIPTION_API_VERSION = '2020-01-01'; const RESOURCE_GROUPS_API_VERSION = '2021-04-01'; @@ -99,13 +99,21 @@ export const environmentSeparationCheck: IntegrationCheck = { const rgEnvSet = new Set(); const rgSamples: Array<{ name: string; environment: string }> = []; let anyRgReadFailed = false; + let resourceGroupsClassified = 0; + const rgCoverageGaps = new Set(); + const rgCoverageGapSubscriptions = new Set(); for (const id of subscriptionIds) { let resourceGroups: ResourceGroup[]; try { - resourceGroups = await armListAll( + const result = await armListAllWithCoverage({ ctx, - `${ARM_BASE}/subscriptions/${id}/resourcegroups?api-version=${RESOURCE_GROUPS_API_VERSION}`, - ); + url: `${ARM_BASE}/subscriptions/${id}/resourcegroups?api-version=${RESOURCE_GROUPS_API_VERSION}`, + }); + resourceGroups = result.items; + for (const gap of result.coverageGaps) { + rgCoverageGaps.add(gap); + rgCoverageGapSubscriptions.add(id); + } } catch (err) { anyRgReadFailed = true; ctx.log( @@ -117,12 +125,15 @@ export const environmentSeparationCheck: IntegrationCheck = { const env = classifyResourceGroupEnv(rg); if (env) { rgEnvSet.add(env); + resourceGroupsClassified++; if (rgSamples.length < 50) rgSamples.push({ name: rg.name, environment: env }); } } } const resourceGroupEnvs = [...rgEnvSet]; - if (confirmsEnvironmentSeparation(resourceGroupEnvs)) { + const resourceGroupCoverageIncomplete = rgCoverageGaps.size > 0; + const resourceGroupSeparationDetected = confirmsEnvironmentSeparation(resourceGroupEnvs); + if (!resourceGroupCoverageIncomplete && resourceGroupSeparationDetected) { ctx.pass({ title: 'Environments separated across resource groups', description: `Detected production separated from non-production across resource groups in ${subscriptionIds.length} in-scope subscription(s): ${resourceGroupEnvs.join(', ')}. Resource-group separation is logical — RGs share the subscription's access and network boundary — not full isolation.`, @@ -138,24 +149,29 @@ export const environmentSeparationCheck: IntegrationCheck = { return; } - // Could not confirm. A read failure in EITHER tier (a subscription name we - // couldn't read, or resource groups we couldn't list) leaves coverage - // incomplete, so the verdict is "could not verify" (retry/permissions) — not - // the confident "could not confirm" (a complete scan that found no split). + // Could not confirm. A read failure or pagination gap in EITHER tier leaves + // coverage incomplete, so the verdict is "could not verify" + // (retry/permissions/scope) — not the confident "could not confirm" (a + // complete scan that found no split). const detectedAll = [...new Set([...subscriptionEnvs, ...resourceGroupEnvs])]; - const readGaps: string[] = []; - if (anySubscriptionReadFailed) readGaps.push('subscriptions'); - if (anyRgReadFailed) readGaps.push('resource groups'); - const coverageIncomplete = readGaps.length > 0; + const coverageGaps: string[] = []; + if (anySubscriptionReadFailed) coverageGaps.push('subscriptions could not be read'); + if (anyRgReadFailed) coverageGaps.push('resource groups could not be listed'); + if (resourceGroupCoverageIncomplete) { + coverageGaps.push('resource-group pagination stopped before all groups were evaluated'); + } + const coverageIncomplete = coverageGaps.length > 0; const base = detectedAll.length === 0 ? `No in-scope Azure subscription or resource group could be classified by environment across ${subscriptionIds.length} subscription(s)` - : `Detected environment(s) ${detectedAll.join(', ')}, but could not confirm a production environment separated from a non-production one across ${subscriptionIds.length} in-scope subscription(s)`; + : resourceGroupSeparationDetected && resourceGroupCoverageIncomplete + ? `Detected production separated from non-production in the scanned resource groups (${resourceGroupEnvs.join(', ')}), but not all resource groups were evaluated across ${subscriptionIds.length} in-scope subscription(s)` + : `Detected environment(s) ${detectedAll.join(', ')}, but could not confirm a production environment separated from a non-production one across ${subscriptionIds.length} in-scope subscription(s)`; ctx.fail({ title: coverageIncomplete ? 'Could not verify environment separation' : 'Could not confirm environment separation', - description: `${base}${coverageIncomplete ? ` (some ${readGaps.join(' and ')} could not be read)` : ''}.`, + description: `${base}${coverageIncomplete ? ` (${coverageGaps.join('; ')})` : ''}.`, resourceType: 'azure-environment-separation', resourceId: 'subscriptions', severity: 'medium', @@ -164,8 +180,14 @@ export const environmentSeparationCheck: IntegrationCheck = { subscriptionEnvironments: subscriptionEnvs, resourceGroupEnvironments: resourceGroupEnvs, subscriptionsScanned: subscriptionIds.length, - resourceGroupsClassified: rgSamples.length, + resourceGroupsClassified, ...(coverageIncomplete ? { coverageIncomplete: true } : {}), + ...(resourceGroupCoverageIncomplete + ? { + resourceGroupCoverageGaps: [...rgCoverageGaps], + resourceGroupCoverageGapSubscriptions: [...rgCoverageGapSubscriptions], + } + : {}), }, }); }, diff --git a/packages/integration-platform/src/manifests/azure/checks/shared.ts b/packages/integration-platform/src/manifests/azure/checks/shared.ts index 97bc02d2e..b0da9b237 100644 --- a/packages/integration-platform/src/manifests/azure/checks/shared.ts +++ b/packages/integration-platform/src/manifests/azure/checks/shared.ts @@ -2,6 +2,7 @@ import type { CheckContext } from '../../../types'; import { remediationForReadFailure, toHttpReadFailure } from '../../http-read-failure'; const ARM = 'https://management.azure.com'; +const ARM_PAGE_CAP = 50; /** Fan-out bound for auto-discovered subscriptions (13 checks × N subs). */ const MAX_SUBSCRIPTIONS = 50; @@ -112,14 +113,25 @@ export async function resolveAzureSubscriptionIds( } /** Paginate an Azure ARM list endpoint (`{ value: T[], nextLink? }`). */ -export async function armListAll( - ctx: CheckContext, - url: string, -): Promise { +export type ArmListCoverageGap = 'page-cap' | 'unexpected-next-link-host'; + +export interface ArmListResult { + items: T[]; + coverageGaps: ArmListCoverageGap[]; +} + +export async function armListAllWithCoverage({ + ctx, + url, +}: { + ctx: CheckContext; + url: string; +}): Promise> { const out: T[] = []; + const coverageGaps: ArmListCoverageGap[] = []; let nextUrl: string | undefined = url; let pages = 0; - while (nextUrl && pages < 50) { + while (nextUrl && pages < ARM_PAGE_CAP) { const data: { value?: T[]; nextLink?: string } = await ctx.fetch(nextUrl); if (Array.isArray(data.value)) out.push(...data.value); nextUrl = data.nextLink; @@ -129,6 +141,7 @@ export async function armListAll( ctx.warn('Azure ARM nextLink pointed to an unexpected host; stopping pagination', { nextLink: nextUrl, }); + coverageGaps.push('unexpected-next-link-host'); nextUrl = undefined; } pages++; @@ -138,8 +151,15 @@ export async function armListAll( url, pages, }); + coverageGaps.push('page-cap'); } - return out; + return { items: out, coverageGaps }; +} + +/** Paginate an Azure ARM list endpoint (`{ value: T[], nextLink? }`). */ +export async function armListAll(ctx: CheckContext, url: string): Promise { + const result = await armListAllWithCoverage({ ctx, url }); + return result.items; } /**