diff --git a/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts b/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts new file mode 100644 index 0000000000..30fd3a947d --- /dev/null +++ b/packages/integration-platform/src/manifests/__tests__/environment-classification.test.ts @@ -0,0 +1,147 @@ +import { describe, expect, it } from 'bun:test'; +import { + classifyEnvironment, + confirmsEnvironmentSeparation, + envTagValues, +} from '../environment-classification'; + +describe('confirmsEnvironmentSeparation — requires prod + non-prod', () => { + it('passes only with production AND a non-production environment', () => { + expect(confirmsEnvironmentSeparation(['production', 'development'])).toBe(true); + expect(confirmsEnvironmentSeparation(['production', 'staging', 'test'])).toBe(true); + }); + + it('fails on two non-production environments (no production)', () => { + expect(confirmsEnvironmentSeparation(['development', 'staging'])).toBe(false); + }); + + it('fails on production alone, or a single environment, or none', () => { + expect(confirmsEnvironmentSeparation(['production'])).toBe(false); + expect(confirmsEnvironmentSeparation(['development'])).toBe(false); + expect(confirmsEnvironmentSeparation([])).toBe(false); + }); +}); + +describe('classifyEnvironment — token-exact matching', () => { + it('classifies common environment tokens', () => { + expect(classifyEnvironment(['myapp-prod'])).toBe('production'); + expect(classifyEnvironment(['web-staging'])).toBe('staging'); + expect(classifyEnvironment(['svc-dev'])).toBe('development'); + expect(classifyEnvironment(['api-qa'])).toBe('test'); + expect(classifyEnvironment(['demo'])).toBe('sandbox'); + }); + + it('handles ANY separator including underscore (the bug the reviewer caught)', () => { + expect(classifyEnvironment(['myapp_prod'])).toBe('production'); + expect(classifyEnvironment(['prod_network'])).toBe('production'); + expect(classifyEnvironment(['dev_network'])).toBe('development'); + expect(classifyEnvironment(['myapp.prod'])).toBe('production'); + expect(classifyEnvironment(['rg/staging'])).toBe('staging'); + }); + + it('does NOT false-match substrings (product/developer/etc.)', () => { + expect(classifyEnvironment(['product-catalog'])).toBeNull(); + expect(classifyEnvironment(['developer-portal'])).toBeNull(); + expect(classifyEnvironment(['data-warehouse'])).toBeNull(); + expect(classifyEnvironment(['prod123'])).toBeNull(); // not a clean token + }); + + it('treats preprod as staging, not production', () => { + expect(classifyEnvironment(['app-preprod'])).toBe('staging'); + expect(classifyEnvironment(['preprod'])).toBe('staging'); + }); + + it('is case-insensitive and skips empty/undefined candidates', () => { + expect(classifyEnvironment(['PROD'])).toBe('production'); + expect(classifyEnvironment([undefined, '', 'svc-dev'])).toBe('development'); + }); + + it('returns the first matching candidate (authoritative source first)', () => { + // an explicit env value passed first wins over a later name + expect(classifyEnvironment(['production', 'thing-dev'])).toBe('production'); + }); + + it('returns null when nothing matches', () => { + expect(classifyEnvironment(['backend', 'frontend', 'vpc-0abc'])).toBeNull(); + }); +}); + +describe('classifyEnvironment — negated/qualified production (cubic finding)', () => { + it('classifies separated negated production as NON-production, not production', () => { + expect(classifyEnvironment(['non-prod'])).toBe('non-production'); + expect(classifyEnvironment(['non_prod'])).toBe('non-production'); + expect(classifyEnvironment(['non.prod'])).toBe('non-production'); + expect(classifyEnvironment(['not-prod'])).toBe('non-production'); + expect(classifyEnvironment(['myapp-non-prod'])).toBe('non-production'); + expect(classifyEnvironment(['non-production'])).toBe('non-production'); + expect(classifyEnvironment(['NON-PROD'])).toBe('non-production'); // case-insensitive + }); + + it('classifies joined non-production spellings as NON-production', () => { + expect(classifyEnvironment(['nonprod'])).toBe('non-production'); + expect(classifyEnvironment(['notprod'])).toBe('non-production'); + expect(classifyEnvironment(['nonprd'])).toBe('non-production'); + expect(classifyEnvironment(['notprd'])).toBe('non-production'); + expect(classifyEnvironment(['nonproduction'])).toBe('non-production'); + expect(classifyEnvironment(['notproduction'])).toBe('non-production'); + expect(classifyEnvironment(['app-nonprod'])).toBe('non-production'); + }); + + it('classifies pre-prod as staging (consistent with joined "preprod")', () => { + expect(classifyEnvironment(['pre-prod'])).toBe('staging'); + expect(classifyEnvironment(['pre_prod'])).toBe('staging'); + expect(classifyEnvironment(['app-pre-prod'])).toBe('staging'); + expect(classifyEnvironment(['preprod'])).toBe('staging'); + }); + + it('still classifies plain production (negation needs an ADJACENT qualifier)', () => { + expect(classifyEnvironment(['prod'])).toBe('production'); + expect(classifyEnvironment(['myapp-prod'])).toBe('production'); + // a "non" that does not immediately precede a prod token must NOT negate + expect(classifyEnvironment(['prod-non-critical'])).toBe('production'); + }); + + it('end-to-end: prod + non-prod now CONFIRMS separation (was a false fail)', () => { + // Pre-fix, "non-prod" classified as production, so detected={production} + // and separation failed despite a real prod/non-prod split. + const detected = [ + ...new Set(['prod-vpc', 'non-prod-vpc'].map((n) => classifyEnvironment([n]))), + ].filter((e): e is string => e !== null); + expect(detected).toContain('production'); + expect(detected).toContain('non-production'); + expect(confirmsEnvironmentSeparation(detected)).toBe(true); + }); + + it('end-to-end: a non-prod-only footprint does NOT fabricate production (was a false pass)', () => { + // Pre-fix, "non-prod-staging" classified as production, so dev + that string + // passed as if production existed. + const detected = [ + ...new Set(['dev', 'non-prod-staging'].map((n) => classifyEnvironment([n]))), + ].filter((e): e is string => e !== null); + expect(detected).not.toContain('production'); + expect(confirmsEnvironmentSeparation(detected)).toBe(false); + }); +}); + +describe('envTagValues — only env-key tags, case-insensitive', () => { + it('reads environment-indicating keys regardless of case', () => { + expect(envTagValues({ Environment: 'production' })).toEqual(['production']); + }); + + it('returns values in env-key PRIORITY order, not tag insertion order', () => { + // `environment` outranks `stage` even though `stage` is inserted first. + expect(envTagValues({ stage: 'dev', environment: 'prod' })).toEqual(['prod', 'dev']); + // so the authoritative key wins classification + expect(classifyEnvironment(envTagValues({ stage: 'dev', environment: 'prod' }))).toBe( + 'production', + ); + }); + + it('ignores non-environment tags (false-positive guard)', () => { + expect(envTagValues({ team: 'dev-team', costCenter: 'prod-123' })).toEqual([]); + }); + + it('returns [] for undefined tags', () => { + expect(envTagValues(undefined)).toEqual([]); + }); +}); diff --git a/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts b/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts index fddbfcf7df..481cf4dc94 100644 --- a/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts +++ b/packages/integration-platform/src/manifests/aws/checks/__tests__/aws-checks.test.ts @@ -1,6 +1,11 @@ import { describe, expect, it } from 'bun:test'; import { evaluateCloudTrail } from '../cloudtrail'; import { evaluateSecurityGroups } from '../ec2'; +import { + buildEnvironmentSeparationOutcomes, + classifyVpcEnv, + evaluateEnvironmentSeparation, +} from '../environment-separation'; import { evaluateAccountSummary, evaluateIamAccount, @@ -953,3 +958,109 @@ describe('account-level findings carry AWS account attribution (cubic finding on expect(failed[0]!.description).toContain('AWS account 123456789012'); }); }); + +describe('AWS environment separation', () => { + it('classifyVpcEnv: env tag wins, then Name tag, incl. underscore', () => { + expect(classifyVpcEnv([{ Key: 'Environment', Value: 'production' }])).toBe('production'); + expect(classifyVpcEnv([{ Key: 'Name', Value: 'prod-vpc' }])).toBe('production'); + expect(classifyVpcEnv([{ Key: 'Name', Value: 'vpc_dev' }])).toBe('development'); + }); + + it('classifyVpcEnv: ignores non-env tags (no fabricated environment)', () => { + expect(classifyVpcEnv([{ Key: 'team', Value: 'dev-team' }])).toBeNull(); + expect(classifyVpcEnv(undefined)).toBeNull(); + }); + + it('passes on production + non-production, without claiming cross-account isolation', () => { + const out = evaluateEnvironmentSeparation([ + { vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }, + { vpcId: 'vpc-2', region: 'us-east-1', environment: 'development' }, + ]); + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('pass'); + expect(out[0]!.description).toMatch(/not cross-account isolation/); + }); + + it('fails when only non-production environments are present (no production)', () => { + const out = evaluateEnvironmentSeparation([ + { vpcId: 'vpc-1', region: 'us-east-1', environment: 'development' }, + { vpcId: 'vpc-2', region: 'us-east-1', environment: 'staging' }, + ]); + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('fail'); + }); + + it('fails (low) with guidance when only one environment is detected', () => { + const out = evaluateEnvironmentSeparation([ + { vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }, + ]); + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('fail'); + expect(out[0]!.severity).toBe('low'); + expect(out[0]!.remediation).toMatch(/separate AWS account per environment/); + }); + + it('fails when no VPC can be classified', () => { + const out = evaluateEnvironmentSeparation([ + { vpcId: 'vpc-1', region: 'us-east-1', environment: null }, + { vpcId: 'vpc-2', region: 'us-east-1', environment: null }, + ]); + expect(out[0]!.kind).toBe('fail'); + }); + + it('fails (low) with guidance when there are no non-default VPCs', () => { + const out = evaluateEnvironmentSeparation([]); + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('fail'); + expect(out[0]!.severity).toBe('low'); + expect(out[0]!.evidence).toMatchObject({ vpcCount: 0 }); + }); +}); + +describe('buildEnvironmentSeparationOutcomes — region failures vs verdict (cubic finding)', () => { + const failure = { error: 'AccessDenied: ec2:DescribeVpcs', denied: true }; + const regionFailures = [{ region: 'eu-west-1', failure }]; + + it('does NOT pair a region-failure fail with a confirmed pass', () => { + const out = buildEnvironmentSeparationOutcomes( + [ + { vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }, + { vpcId: 'vpc-2', region: 'us-east-1', environment: 'development' }, + ], + regionFailures, + ); + // A confirmed pass stands alone — more regions can only ADD environments, so + // the unread region can't un-confirm it; emitting a fail too would be a + // contradictory pass+fail in one run. + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('pass'); + }); + + it('surfaces the region failure alongside an UNconfirmed verdict (both negative)', () => { + const out = buildEnvironmentSeparationOutcomes( + [{ vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }], + regionFailures, + ); + expect(out.length).toBeGreaterThanOrEqual(2); + expect(out.every((o) => o.kind === 'fail')).toBe(true); + expect(out.some((o) => /Could not verify VPCs in some regions/.test(o.title))).toBe(true); + }); + + it('returns only the region-failure finding when zero VPCs were read', () => { + const out = buildEnvironmentSeparationOutcomes([], regionFailures); + expect(out).toHaveLength(1); + expect(out[0]!.title).toMatch(/Could not verify VPCs in some regions/); + }); + + it('with no region failures, returns the separation verdict unchanged', () => { + const out = buildEnvironmentSeparationOutcomes( + [ + { vpcId: 'vpc-1', region: 'us-east-1', environment: 'production' }, + { vpcId: 'vpc-2', region: 'us-east-1', environment: 'staging' }, + ], + [], + ); + expect(out).toHaveLength(1); + expect(out[0]!.kind).toBe('pass'); + }); +}); diff --git a/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts b/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts new file mode 100644 index 0000000000..89021947bd --- /dev/null +++ b/packages/integration-platform/src/manifests/aws/checks/environment-separation.ts @@ -0,0 +1,243 @@ +import { DescribeVpcsCommand, EC2Client } from '@aws-sdk/client-ec2'; +import { TASK_TEMPLATES } from '../../../task-mappings'; +import type { CheckContext, IntegrationCheck } from '../../../types'; +import { + classifyEnvironment, + confirmsEnvironmentSeparation, + envTagValues, +} from '../../environment-classification'; +import { + combineReadFailures, + emitOutcomes, + remediationForReadFailure, + resolveAwsSessionOrFail, + toReadFailure, + type CheckOutcome, + type ReadFailure, +} from './shared'; + +export interface VpcInfo { + vpcId: string; + region: string; + environment: string | null; +} + +// Shown on every "could not confirm" outcome. AWS's recommended separation is a +// separate ACCOUNT per environment, which is invisible from one connection (one +// account's role), so a single-account result is the EXPECTED shape for those +// customers — guide, never accuse. +const ACCOUNT_GUIDANCE = + 'If you separate environments using a separate AWS account per environment (the recommended pattern), connect each environment account as its own connection — this check evaluates one account at a time. Otherwise separate prod/non-prod into distinct VPCs and tag each (Environment=production / Environment=staging), or upload an architecture diagram as evidence.'; + +/** + * Classify a VPC into an environment from its tags: an explicit `environment` + * tag value first, then the `Name` tag value. Only env-key tag values and the + * Name tag are considered — arbitrary tag values are NOT scanned, so a stray + * `team=dev-team` tag can't fabricate a second environment. + */ +export function classifyVpcEnv( + tags: ReadonlyArray<{ Key?: string; Value?: string }> | undefined, +): string | null { + const tagMap: Record = {}; + for (const t of tags ?? []) { + if (typeof t.Key === 'string' && typeof t.Value === 'string') { + tagMap[t.Key] = t.Value; + } + } + const nameTag = Object.entries(tagMap).find( + ([k]) => k.toLowerCase() === 'name', + )?.[1]; + return classifyEnvironment([...envTagValues(tagMap), nameTag]); +} + +/** + * Pure verdict from the account's non-default VPCs. PASS only when a PRODUCTION + * environment is positively observed alongside at least one NON-PRODUCTION + * environment; otherwise fail-with-guidance (never a silent pass). The PASS + * wording is deliberately scoped to "within a single AWS account" — environment- + * labeled VPCs prove network labeling, not cross-account isolation (they can be + * peered / share the account boundary). + */ +export function evaluateEnvironmentSeparation(vpcs: VpcInfo[]): CheckOutcome[] { + const sample = vpcs.slice(0, 50).map((v) => ({ + vpcId: v.vpcId, + region: v.region, + environment: v.environment ?? 'unclassified', + })); + + if (vpcs.length === 0) { + return [ + { + kind: 'fail', + title: 'Could not confirm environment separation', + description: + 'No non-default VPCs were found in this AWS account, so environment separation could not be evaluated here.', + resourceType: 'aws-environment-separation', + resourceId: 'vpcs', + severity: 'low', + remediation: ACCOUNT_GUIDANCE, + evidence: { vpcCount: 0 }, + }, + ]; + } + + const detected = [ + ...new Set( + vpcs.map((v) => v.environment).filter((e): e is string => e !== null), + ), + ]; + + // A confirmed pass requires a production environment separated from a + // non-production one — two non-production VPCs alone do not demonstrate that + // production is segregated. + if (confirmsEnvironmentSeparation(detected)) { + return [ + { + kind: 'pass', + title: 'Distinct environment-labeled VPCs found', + description: `Detected production separated from non-production across non-default VPCs in this AWS account: ${detected.join(', ')}. This evidences environment-labeled network separation within a single account (not cross-account isolation).`, + resourceType: 'aws-environment-separation', + resourceId: 'vpcs', + evidence: { + detectedEnvironments: detected, + vpcCount: vpcs.length, + vpcs: sample, + }, + }, + ]; + } + + return [ + { + kind: 'fail', + title: 'Could not confirm environment separation', + description: + detected.length === 0 + ? "No VPC in this account could be classified by environment, so environment separation could not be confirmed." + : `Detected environment(s) ${detected.join(', ')} among this account's VPCs, but could not confirm a production environment separated from a non-production one; this connection evaluates a single AWS account.`, + resourceType: 'aws-environment-separation', + resourceId: 'vpcs', + severity: 'low', + remediation: ACCOUNT_GUIDANCE, + evidence: { + detectedEnvironments: detected, + vpcCount: vpcs.length, + vpcs: sample, + }, + }, + ]; +} + +/** + * Combine the VPCs we read with any per-region read failures into the outcomes + * to emit. A region we couldn't read leaves coverage incomplete and is surfaced + * as its own "could not verify" finding — UNLESS the VPCs we DID read already + * confirm separation. Reading more regions can only ADD environments, so it can + * never un-confirm a positive result; pairing a confirmed pass with a + * verification-failure would only emit a contradictory pass+fail in one run. + * When zero VPCs were read AND a region failed, only the region-failure finding + * is returned — a "no VPCs" verdict layered on unread data would mislead. + */ +export function buildEnvironmentSeparationOutcomes( + vpcs: VpcInfo[], + regionFailures: ReadonlyArray<{ region: string; failure: ReadFailure }>, +): CheckOutcome[] { + const separation = evaluateEnvironmentSeparation(vpcs); + const confirmed = separation.some((o) => o.kind === 'pass'); + + // No coverage gap, or separation already proven → the verdict stands alone. + if (regionFailures.length === 0 || confirmed) return separation; + + const regions = regionFailures.map((r) => r.region); + const regionFailure: CheckOutcome = { + kind: 'fail', + title: 'Could not verify VPCs in some regions', + description: `VPCs could not be listed in: ${regions.join(', ')}, so environment separation is unverified in those regions.`, + resourceType: 'aws-environment-separation', + resourceId: `regions:${regions.join(',')}`, + severity: 'medium', + remediation: remediationForReadFailure( + combineReadFailures(regionFailures.map((r) => r.failure)), + 'Grant ec2:DescribeVpcs to the integration role in all enabled regions, then re-run the check.', + ), + evidence: { + failedRegions: regionFailures.map((r) => ({ + region: r.region, + error: r.failure.error, + })), + }, + }; + + // Zero VPCs read + a region failure: the unverified-region finding already + // tells the story on its own. + if (vpcs.length === 0) return [regionFailure]; + + // Coverage gap AND we couldn't confirm from what we read: surface both — + // they're consistent (both negative). + return [regionFailure, ...separation]; +} + +/** + * Separation of Environments check (heuristic, within-account). Lists every + * non-default VPC across the account's regions, classifies each by its + * Environment/Name tag, and passes when a production environment is found + * alongside at least one non-production environment. + * Account-per-environment separation is invisible from one connection (no + * Organizations access), so a single-environment account fails with guidance to + * connect each env account or upload a diagram — the task accepts manual + * evidence either way. + */ +export const environmentSeparationCheck: IntegrationCheck = { + id: 'aws-environment-separation', + name: 'Separation of environments — production isolated from non-production', + description: + 'Verify production and non-production workloads run in distinct VPCs within the AWS account.', + service: 'ec2-vpc', + taskMapping: TASK_TEMPLATES.separationOfEnvironments, + run: async (ctx: CheckContext) => { + const session = await resolveAwsSessionOrFail(ctx); + if (!session) { + ctx.log( + 'AWS environment-separation check: connection not configured — skipping', + ); + return; + } + + const vpcs: VpcInfo[] = []; + const regionFailures: Array<{ region: string; failure: ReadFailure }> = []; + + for (const region of session.regions) { + try { + const ec2 = new EC2Client({ + region, + credentials: session.credentials, + maxAttempts: 5, + }); + let token: string | undefined; + do { + const resp = await ec2.send( + new DescribeVpcsCommand({ NextToken: token, MaxResults: 1000 }), + ); + for (const v of resp.Vpcs ?? []) { + // Default VPCs ship in every region (usually untagged) and would + // pollute the signal; only evaluate available, non-default VPCs. + if (v.IsDefault === true) continue; + if (v.State && v.State !== 'available') continue; + vpcs.push({ + vpcId: v.VpcId ?? 'unknown', + region, + environment: classifyVpcEnv(v.Tags), + }); + } + token = resp.NextToken; + } while (token); + } catch (err) { + const failure = toReadFailure(err); + regionFailures.push({ region, failure }); + ctx.log(`VPC: could not list VPCs in ${region}: ${failure.error}`); + } + } + + emitOutcomes(ctx, buildEnvironmentSeparationOutcomes(vpcs, regionFailures)); + }, +}; diff --git a/packages/integration-platform/src/manifests/aws/checks/index.ts b/packages/integration-platform/src/manifests/aws/checks/index.ts index ac43946a32..10e3b9934e 100644 --- a/packages/integration-platform/src/manifests/aws/checks/index.ts +++ b/packages/integration-platform/src/manifests/aws/checks/index.ts @@ -4,3 +4,4 @@ export { ec2SecurityGroupsCheck } from './ec2'; export { rdsEncryptionCheck, rdsBackupsCheck } from './rds'; export { kmsKeyRotationCheck } from './kms'; export { cloudTrailEnabledCheck } from './cloudtrail'; +export { environmentSeparationCheck } from './environment-separation'; diff --git a/packages/integration-platform/src/manifests/aws/index.ts b/packages/integration-platform/src/manifests/aws/index.ts index b84f4a84fe..cf6716e3d0 100644 --- a/packages/integration-platform/src/manifests/aws/index.ts +++ b/packages/integration-platform/src/manifests/aws/index.ts @@ -2,6 +2,7 @@ import type { IntegrationManifest } from '../../types'; import { cloudTrailEnabledCheck, ec2SecurityGroupsCheck, + environmentSeparationCheck, iamAccountSecurityCheck, kmsKeyRotationCheck, rdsBackupsCheck, @@ -100,5 +101,6 @@ export const awsManifest: IntegrationManifest = { rdsBackupsCheck, kmsKeyRotationCheck, cloudTrailEnabledCheck, + environmentSeparationCheck, ], }; diff --git a/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts b/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts index 6a3c2fc6c3..d763601ea7 100644 --- a/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts +++ b/packages/integration-platform/src/manifests/azure/checks/__tests__/azure-checks.test.ts @@ -6,6 +6,7 @@ import type { } from '../../../../types'; import { azureManifest } from '../../index'; import { rbacLeastPrivilegeCheck } from '../entra-id'; +import { environmentSeparationCheck } from '../environment-separation'; import { keyVaultProtectionCheck, keyVaultRbacCheck } from '../key-vault'; import { monitorLoggingAlertingCheck } from '../monitor'; import { @@ -896,3 +897,158 @@ describe('azure subscription picker fetchOptions', () => { expect(options).toEqual([]); }); }); + +describe('Azure environment separation', () => { + // Mocks the IN-SCOPE per-subscription name GET and the per-subscription + // resource-group list. Scope is driven by the `variables` arg (subscription_id + // / subscription_ids) via resolveAzureSubscriptionIds — there is NO list-all. + const azFetch = + (opts: { names?: Record; rgs?: Record }) => + (url: string) => { + const subM = url.match(/\/subscriptions\/([^/?]+)\?api-version/); + if (subM) return { displayName: opts.names?.[subM[1]!] ?? subM[1]! }; + const rgM = url.match(/\/subscriptions\/([^/]+)\/resourcegroups/); + if (rgM) return { value: opts.rgs?.[rgM[1]!] ?? [] }; + return {}; + }; + + it('passes (strong) when scoped subscriptions classify to prod + non-prod', async () => { + const { passed, failed } = await run( + environmentSeparationCheck, + azFetch({ names: { s1: 'Production', s2: 'Development' } }), + { subscription_ids: ['s1', 's2'] }, + ); + expect(failed).toHaveLength(0); + expect(passed).toContain('Environments separated across subscriptions'); + }); + + it('passes (weak) on resource-group separation, disclosed as logical', async () => { + const { passed, failed } = await run( + environmentSeparationCheck, + azFetch({ + names: { 'sub-1': 'MyCompany' }, + rgs: { 'sub-1': [{ id: 'a', name: 'rg-prod' }, { id: 'b', name: 'rg-dev' }] }, + }), + ); + expect(failed).toHaveLength(0); + expect(passed).toContain('Environments separated across resource groups'); + }); + + it('passes on resource-group tags (case-insensitive key)', async () => { + const { passed } = await run( + environmentSeparationCheck, + azFetch({ + names: { 'sub-1': 'Company' }, + rgs: { + 'sub-1': [ + { id: 'a', name: 'a', tags: { environment: 'production' } }, + { id: 'b', name: 'b', tags: { Environment: 'staging' } }, + ], + }, + }), + ); + expect(passed).toContain('Environments separated across resource groups'); + }); + + it('fails on two non-production environments (no production)', async () => { + const { passed, failed } = await run( + environmentSeparationCheck, + azFetch({ + names: { 'sub-1': 'Company' }, + rgs: { 'sub-1': [{ id: 'a', name: 'rg-dev' }, { id: 'b', name: 'rg-staging' }] }, + }), + ); + expect(passed).toHaveLength(0); + expect(failed.some((f) => /Could not confirm environment separation/.test(f.title))).toBe(true); + }); + + it('does NOT union tiers: prod subscription + an rg-dev inside fails', async () => { + const { passed, failed } = await run( + environmentSeparationCheck, + azFetch({ names: { s1: 'Production' }, rgs: { s1: [{ id: 'a', name: 'rg-dev' }] } }), + { subscription_ids: ['s1'] }, + ); + expect(passed).toHaveLength(0); + expect(failed.some((f) => /Could not confirm environment separation/.test(f.title))).toBe(true); + }); + + it('only scans the configured subscription scope', async () => { + // Scope is ['s1']; touching any other subscription must throw. + const { passed, failed } = await run( + environmentSeparationCheck, + (url) => { + if (!url.includes('/subscriptions/s1')) { + throw new Error(`out-of-scope access: ${url}`); + } + const subM = url.match(/\/subscriptions\/([^/?]+)\?api-version/); + if (subM) return { displayName: 'Company' }; + if (url.includes('/resourcegroups')) { + return { value: [{ id: 'a', name: 'rg-prod' }, { id: 'b', name: 'rg-dev' }] }; + } + return {}; + }, + { subscription_ids: ['s1'] }, + ); + expect(failed).toHaveLength(0); + expect(passed).toContain('Environments separated across resource groups'); + }); + + it('fails with guidance when nothing classifies', async () => { + const { passed, failed } = await run( + environmentSeparationCheck, + azFetch({ names: { 'sub-1': 'Company' }, rgs: { 'sub-1': [{ id: 'a', name: 'backend' }] } }), + ); + expect(passed).toHaveLength(0); + expect(failed).toHaveLength(1); + expect(failed[0]!.remediation).toMatch(/distinct subscriptions/); + }); + + it('fails "could not verify" when a resource-group read fails', async () => { + const { passed, failed } = await run(environmentSeparationCheck, (url) => { + if (url.includes('/resourcegroups')) throw new Error('HTTP 403: Forbidden'); + const subM = url.match(/\/subscriptions\/([^/?]+)\?api-version/); + if (subM) return { displayName: 'Company' }; + return {}; + }); + expect(passed).toHaveLength(0); + expect(failed.some((f) => /Could not verify environment separation/.test(f.title))).toBe(true); + }); + + it('fails "could not verify" when a SUBSCRIPTION name read fails (cubic finding)', async () => { + // Tier-1 displayName read fails while resource-group listing succeeds but + // classifies nothing. Coverage is incomplete, so the verdict must be the + // retry-signalling "could not verify", not the confident "could not confirm". + const { passed, failed } = await run( + environmentSeparationCheck, + (url) => { + const subM = url.match(/\/subscriptions\/([^/?]+)\?api-version/); + if (subM) throw new Error('HTTP 403: Forbidden'); + if (url.includes('/resourcegroups')) { + return { value: [{ id: 'a', name: 'backend' }] }; + } + return {}; + }, + { subscription_ids: ['s1'] }, + ); + expect(passed).toHaveLength(0); + expect(failed.some((f) => /Could not verify environment separation/.test(f.title))).toBe(true); + expect(failed.some((f) => /Could not confirm environment separation/.test(f.title))).toBe(false); + }); + + it('defers to the scope resolver when no subscription is in scope', async () => { + // variables {} → discovery; no enabled subscription → resolveAzureSubscriptionIds + // emits its own scope finding and the check early-returns (no double fail). + const { passed, failed } = await run( + environmentSeparationCheck, + (url) => { + if (url.includes('/subscriptions?api-version')) { + return { value: [{ subscriptionId: 's1', state: 'Disabled' }] }; + } + return {}; + }, + {}, + ); + expect(passed).toHaveLength(0); + expect(failed.some((f) => /Could not verify Azure subscription scope/.test(f.title))).toBe(true); + }); +}); diff --git a/packages/integration-platform/src/manifests/azure/checks/__tests__/environment-separation.test.ts b/packages/integration-platform/src/manifests/azure/checks/__tests__/environment-separation.test.ts new file mode 100644 index 0000000000..a9c421175d --- /dev/null +++ b/packages/integration-platform/src/manifests/azure/checks/__tests__/environment-separation.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from 'bun:test'; +import type { + CheckContext, + CheckFindingResult, + CheckPassingResult, + CheckVariableValues, +} from '../../../../types'; +import { environmentSeparationCheck } from '../environment-separation'; + +interface CapturedResults { + passed: CheckPassingResult[]; + failed: CheckFindingResult[]; +} + +async function runEnvironmentSeparation({ + fetch, + variables = { subscription_id: 'sub-1' }, +}: { + fetch: (url: string) => unknown; + variables?: CheckVariableValues; +}): Promise { + const passed: CheckPassingResult[] = []; + const failed: CheckFindingResult[] = []; + const ctx = { + accessToken: 'tok', + credentials: {}, + variables, + connectionId: 'connection-id', + organizationId: 'organization-id', + metadata: {}, + log: () => {}, + warn: () => {}, + error: () => {}, + pass: (result) => passed.push(result), + fail: (finding) => failed.push(finding), + addPassingResult: () => {}, + addFinding: () => {}, + fetch: async (url: string) => fetch(url), + post: async () => ({}), + put: async () => ({}), + patch: async () => ({}), + delete: async () => ({}), + graphql: async () => ({}), + fetchAllPages: async () => [], + fetchWithCursor: async () => [], + fetchWithLinkHeader: async () => [], + getState: async () => null, + setState: async () => {}, + } as CheckContext; + + await environmentSeparationCheck.run(ctx); + return { passed, failed }; +} + +describe('Azure environment separation pagination coverage', () => { + it('does not emit a resource-group pass when ARM pagination hits the page cap', async () => { + const out = await runEnvironmentSeparation({ + fetch: (url) => { + if (url.match(/\/subscriptions\/sub-1\?api-version/)) { + return { displayName: 'Company' }; + } + + if (url.includes('/resourcegroups')) { + const pageMatch = url.match(/[?&]page=(\d+)/); + const page = pageMatch ? Number(pageMatch[1]) : 0; + const name = page === 0 ? 'rg-prod' : 'rg-dev'; + + return { + value: [{ id: `rg-${page}`, name }], + nextLink: `https://management.azure.com/subscriptions/sub-1/resourcegroups?page=${ + page + 1 + }`, + }; + } + + return {}; + }, + }); + + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not verify environment separation/); + expect(out.failed[0]!.evidence).toMatchObject({ + coverageIncomplete: true, + resourceGroupCoverageGaps: ['page-cap'], + resourceGroupCoverageGapSubscriptions: ['sub-1'], + }); + }); +}); diff --git a/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts new file mode 100644 index 0000000000..d48c1ebce5 --- /dev/null +++ b/packages/integration-platform/src/manifests/azure/checks/environment-separation.ts @@ -0,0 +1,194 @@ +import { TASK_TEMPLATES } from '../../../task-mappings'; +import type { CheckContext, IntegrationCheck } from '../../../types'; +import { + classifyEnvironment, + confirmsEnvironmentSeparation, + envTagValues, +} from '../../environment-classification'; +import { toHttpReadFailure } from '../../http-read-failure'; +import { ARM_BASE, armListAllWithCoverage, resolveAzureSubscriptionIds } from './shared'; + +const SUBSCRIPTION_API_VERSION = '2020-01-01'; +const RESOURCE_GROUPS_API_VERSION = '2021-04-01'; + +interface ResourceGroup { + id: string; + name: string; + location?: string; + tags?: Record; +} + +/** + * Classify a resource group into an environment: an explicit `environment` tag + * value first, then the RG name. Only env-key tag values and the name are + * considered (never arbitrary tag values). + */ +export function classifyResourceGroupEnv(rg: { + name: string; + tags?: Record; +}): string | null { + return classifyEnvironment([...envTagValues(rg.tags), rg.name]); +} + +const GUIDANCE = + 'Separate production and non-production into distinct subscriptions (strongest), or tag each resource group with an `environment` tag (e.g. environment=production / environment=staging). If you separate environments another way, upload a console screenshot or architecture diagram as evidence.'; + +/** + * Separation of Environments check (heuristic). Evaluates ONLY the subscriptions + * the connection is scoped to via `resolveAzureSubscriptionIds` — the same + * opt-in scope every Azure check honors (selection → legacy single → first + * enabled). That helper also bounds the fan-out and surfaces an over-limit + * selection or an unresolvable scope as its own finding, and returns [] when + * nothing is in scope. Two tiers, in safety order: + * 1) Subscriptions — a real isolation/RBAC/billing boundary. + * 2) Resource groups — the most commonly env-named primitive, but only a + * LOGICAL container (shares the subscription's access/network). + * A pass requires a PRODUCTION environment separated from a NON-PRODUCTION one + * (not merely two non-production environments). Tiers are not unioned; anything + * else fails with guidance and the task accepts manual evidence. + */ +export const environmentSeparationCheck: IntegrationCheck = { + id: 'azure-environment-separation', + name: 'Separation of environments — production isolated from non-production', + description: + 'Verify production and non-production are separated across Azure subscriptions or resource groups.', + service: 'policy', + taskMapping: TASK_TEMPLATES.separationOfEnvironments, + run: async (ctx: CheckContext) => { + const subscriptionIds = await resolveAzureSubscriptionIds(ctx); + // resolveAzureSubscriptionIds already emitted a finding when scope is empty. + if (subscriptionIds.length === 0) return; + + // Tier 1 (strong): subscription-level separation. Read each IN-SCOPE + // subscription's display name only — we never touch subscriptions outside + // the configured selection. + const subscriptionEnvSet = new Set(); + let anySubscriptionReadFailed = false; + for (const id of subscriptionIds) { + try { + const sub = await ctx.fetch<{ displayName?: string }>( + `${ARM_BASE}/subscriptions/${id}?api-version=${SUBSCRIPTION_API_VERSION}`, + ); + const env = classifyEnvironment([sub.displayName]); + if (env) subscriptionEnvSet.add(env); + } catch (err) { + anySubscriptionReadFailed = true; + ctx.log( + `Azure env-separation: could not read subscription ${id} — ${toHttpReadFailure(err).error}`, + ); + } + } + const subscriptionEnvs = [...subscriptionEnvSet]; + if (confirmsEnvironmentSeparation(subscriptionEnvs)) { + ctx.pass({ + title: 'Environments separated across subscriptions', + description: `Detected production separated from non-production across ${subscriptionIds.length} in-scope Azure subscription(s): ${subscriptionEnvs.join(', ')} (subscription-level boundary).`, + resourceType: 'azure-environment-separation', + resourceId: 'subscriptions', + evidence: { + boundary: 'subscription', + detectedEnvironments: subscriptionEnvs, + subscriptionsScanned: subscriptionIds.length, + }, + }); + return; + } + + // Tier 2 (weak, logical): resource-group-level separation within the + // IN-SCOPE subscriptions only. + const rgEnvSet = new Set(); + const rgSamples: Array<{ name: string; environment: string }> = []; + let anyRgReadFailed = false; + let resourceGroupsClassified = 0; + const rgCoverageGaps = new Set(); + const rgCoverageGapSubscriptions = new Set(); + for (const id of subscriptionIds) { + let resourceGroups: ResourceGroup[]; + try { + const result = await armListAllWithCoverage({ + ctx, + url: `${ARM_BASE}/subscriptions/${id}/resourcegroups?api-version=${RESOURCE_GROUPS_API_VERSION}`, + }); + resourceGroups = result.items; + for (const gap of result.coverageGaps) { + rgCoverageGaps.add(gap); + rgCoverageGapSubscriptions.add(id); + } + } catch (err) { + anyRgReadFailed = true; + ctx.log( + `Azure env-separation: could not list resource groups in ${id} — ${toHttpReadFailure(err).error}`, + ); + continue; + } + for (const rg of resourceGroups) { + const env = classifyResourceGroupEnv(rg); + if (env) { + rgEnvSet.add(env); + resourceGroupsClassified++; + if (rgSamples.length < 50) rgSamples.push({ name: rg.name, environment: env }); + } + } + } + const resourceGroupEnvs = [...rgEnvSet]; + const resourceGroupCoverageIncomplete = rgCoverageGaps.size > 0; + const resourceGroupSeparationDetected = confirmsEnvironmentSeparation(resourceGroupEnvs); + if (!resourceGroupCoverageIncomplete && resourceGroupSeparationDetected) { + ctx.pass({ + title: 'Environments separated across resource groups', + description: `Detected production separated from non-production across resource groups in ${subscriptionIds.length} in-scope subscription(s): ${resourceGroupEnvs.join(', ')}. Resource-group separation is logical — RGs share the subscription's access and network boundary — not full isolation.`, + resourceType: 'azure-environment-separation', + resourceId: 'resource-groups', + evidence: { + boundary: 'resource-group', + detectedEnvironments: resourceGroupEnvs, + subscriptionsScanned: subscriptionIds.length, + resourceGroups: rgSamples, + }, + }); + return; + } + + // Could not confirm. A read failure or pagination gap in EITHER tier leaves + // coverage incomplete, so the verdict is "could not verify" + // (retry/permissions/scope) — not the confident "could not confirm" (a + // complete scan that found no split). + const detectedAll = [...new Set([...subscriptionEnvs, ...resourceGroupEnvs])]; + const coverageGaps: string[] = []; + if (anySubscriptionReadFailed) coverageGaps.push('subscriptions could not be read'); + if (anyRgReadFailed) coverageGaps.push('resource groups could not be listed'); + if (resourceGroupCoverageIncomplete) { + coverageGaps.push('resource-group pagination stopped before all groups were evaluated'); + } + const coverageIncomplete = coverageGaps.length > 0; + const base = + detectedAll.length === 0 + ? `No in-scope Azure subscription or resource group could be classified by environment across ${subscriptionIds.length} subscription(s)` + : resourceGroupSeparationDetected && resourceGroupCoverageIncomplete + ? `Detected production separated from non-production in the scanned resource groups (${resourceGroupEnvs.join(', ')}), but not all resource groups were evaluated across ${subscriptionIds.length} in-scope subscription(s)` + : `Detected environment(s) ${detectedAll.join(', ')}, but could not confirm a production environment separated from a non-production one across ${subscriptionIds.length} in-scope subscription(s)`; + ctx.fail({ + title: coverageIncomplete + ? 'Could not verify environment separation' + : 'Could not confirm environment separation', + description: `${base}${coverageIncomplete ? ` (${coverageGaps.join('; ')})` : ''}.`, + resourceType: 'azure-environment-separation', + resourceId: 'subscriptions', + severity: 'medium', + remediation: GUIDANCE, + evidence: { + subscriptionEnvironments: subscriptionEnvs, + resourceGroupEnvironments: resourceGroupEnvs, + subscriptionsScanned: subscriptionIds.length, + resourceGroupsClassified, + ...(coverageIncomplete ? { coverageIncomplete: true } : {}), + ...(resourceGroupCoverageIncomplete + ? { + resourceGroupCoverageGaps: [...rgCoverageGaps], + resourceGroupCoverageGapSubscriptions: [...rgCoverageGapSubscriptions], + } + : {}), + }, + }); + }, +}; diff --git a/packages/integration-platform/src/manifests/azure/checks/index.ts b/packages/integration-platform/src/manifests/azure/checks/index.ts index ce7d9f9888..5e1e4a5f79 100644 --- a/packages/integration-platform/src/manifests/azure/checks/index.ts +++ b/packages/integration-platform/src/manifests/azure/checks/index.ts @@ -10,3 +10,4 @@ export { keyVaultProtectionCheck, keyVaultRbacCheck } from './key-vault'; export { nsgNoOpenPortsCheck } from './network'; export { rbacLeastPrivilegeCheck } from './entra-id'; export { monitorLoggingAlertingCheck } from './monitor'; +export { environmentSeparationCheck } from './environment-separation'; diff --git a/packages/integration-platform/src/manifests/azure/checks/shared.ts b/packages/integration-platform/src/manifests/azure/checks/shared.ts index 97bc02d2e0..b0da9b2374 100644 --- a/packages/integration-platform/src/manifests/azure/checks/shared.ts +++ b/packages/integration-platform/src/manifests/azure/checks/shared.ts @@ -2,6 +2,7 @@ import type { CheckContext } from '../../../types'; import { remediationForReadFailure, toHttpReadFailure } from '../../http-read-failure'; const ARM = 'https://management.azure.com'; +const ARM_PAGE_CAP = 50; /** Fan-out bound for auto-discovered subscriptions (13 checks × N subs). */ const MAX_SUBSCRIPTIONS = 50; @@ -112,14 +113,25 @@ export async function resolveAzureSubscriptionIds( } /** Paginate an Azure ARM list endpoint (`{ value: T[], nextLink? }`). */ -export async function armListAll( - ctx: CheckContext, - url: string, -): Promise { +export type ArmListCoverageGap = 'page-cap' | 'unexpected-next-link-host'; + +export interface ArmListResult { + items: T[]; + coverageGaps: ArmListCoverageGap[]; +} + +export async function armListAllWithCoverage({ + ctx, + url, +}: { + ctx: CheckContext; + url: string; +}): Promise> { const out: T[] = []; + const coverageGaps: ArmListCoverageGap[] = []; let nextUrl: string | undefined = url; let pages = 0; - while (nextUrl && pages < 50) { + while (nextUrl && pages < ARM_PAGE_CAP) { const data: { value?: T[]; nextLink?: string } = await ctx.fetch(nextUrl); if (Array.isArray(data.value)) out.push(...data.value); nextUrl = data.nextLink; @@ -129,6 +141,7 @@ export async function armListAll( ctx.warn('Azure ARM nextLink pointed to an unexpected host; stopping pagination', { nextLink: nextUrl, }); + coverageGaps.push('unexpected-next-link-host'); nextUrl = undefined; } pages++; @@ -138,8 +151,15 @@ export async function armListAll( url, pages, }); + coverageGaps.push('page-cap'); } - return out; + return { items: out, coverageGaps }; +} + +/** Paginate an Azure ARM list endpoint (`{ value: T[], nextLink? }`). */ +export async function armListAll(ctx: CheckContext, url: string): Promise { + const result = await armListAllWithCoverage({ ctx, url }); + return result.items; } /** diff --git a/packages/integration-platform/src/manifests/azure/index.ts b/packages/integration-platform/src/manifests/azure/index.ts index f8eb4255e7..8659becc32 100644 --- a/packages/integration-platform/src/manifests/azure/index.ts +++ b/packages/integration-platform/src/manifests/azure/index.ts @@ -1,5 +1,6 @@ import type { IntegrationManifest } from '../../types'; import { + environmentSeparationCheck, keyVaultProtectionCheck, keyVaultRbacCheck, monitorLoggingAlertingCheck, @@ -176,5 +177,6 @@ Our integration only makes read-only API calls for security scanning.`, nsgNoOpenPortsCheck, rbacLeastPrivilegeCheck, monitorLoggingAlertingCheck, + environmentSeparationCheck, ], }; diff --git a/packages/integration-platform/src/manifests/environment-classification.ts b/packages/integration-platform/src/manifests/environment-classification.ts new file mode 100644 index 0000000000..843df25d0d --- /dev/null +++ b/packages/integration-platform/src/manifests/environment-classification.ts @@ -0,0 +1,160 @@ +/** + * Shared environment classification for "Separation of Environments" checks + * across cloud manifests (GCP projects, AWS VPCs, Azure subscriptions/resource + * groups). Kept here — alongside `http-read-failure` — because all three cloud + * manifests need the identical, well-tested logic. + * + * Matching is TOKEN-EXACT, not substring: a candidate string is split on any + * run of non-alphanumeric characters (`-`, `_`, `.`, `/`, spaces) and each + * token is compared exactly to a set of environment keywords. This is why + * "production"/"product" and "dev"/"developer" never collide, and why separator + * style doesn't matter (`myapp-prod`, `myapp_prod`, `myapp.prod` all classify). + * + * Qualifiers are honored: a production keyword that is negated ("non-prod", + * "not-prod", or the joined "nonprod") classifies as NON-PRODUCTION, and a + * pre-production keyword ("pre-prod", matching the joined "preprod") classifies + * as STAGING — never as plain production. Without this a `non-prod` label would + * read as production and corrupt the prod-vs-non-prod separation verdict. + */ + +/** Production keywords — defined once so the qualifier pass below can reuse them. */ +const PRODUCTION_TOKENS: ReadonlySet = new Set([ + 'prod', + 'production', + 'prd', + 'live', +]); + +const ENV_TOKEN_SETS: ReadonlyArray<{ env: string; tokens: ReadonlySet }> = [ + // Production is first so it wins ties when a string carries multiple tokens. + { env: 'production', tokens: PRODUCTION_TOKENS }, + { env: 'staging', tokens: new Set(['staging', 'stage', 'stg', 'preprod', 'uat']) }, + { env: 'development', tokens: new Set(['dev', 'develop', 'development']) }, + { env: 'test', tokens: new Set(['test', 'testing', 'qa']) }, + { env: 'sandbox', tokens: new Set(['sandbox', 'sbx', 'demo']) }, +]; + +/** Default tag/label keys that conventionally carry the environment. */ +export const ENV_TAG_KEYS = ['environment', 'env', 'stage', 'tier'] as const; + +/** The single production bucket; every other bucket is non-production. */ +const PRODUCTION_ENV = 'production'; + +/** The staging bucket — also where pre-production ("pre-prod") classifies. */ +const STAGING_ENV = 'staging'; + +/** + * Canonical bucket for an explicitly non-production label ("non-prod", + * "nonprod"). It says only "not production" — which is exactly what the + * separation control needs: it counts as a non-production environment. + */ +const NON_PRODUCTION_ENV = 'non-production'; + +/** + * Joined non-production spellings: the qualifier and production word run + * together with no separator ("nonprod"), so they survive `tokenize` as a single + * token and are matched whole. Separated forms ("non-prod") are caught by the + * adjacency check in `classifyTokens`. + */ +const NON_PRODUCTION_TOKENS: ReadonlySet = new Set([ + 'nonprod', + 'nonprd', + 'nonproduction', + 'notprod', + 'notprd', + 'notproduction', +]); + +/** + * Qualifier tokens checked against the token IMMEDIATELY before a production + * token. Only production is qualified — it's the one bucket where a missed + * qualifier flips the prod-vs-non-prod verdict — so a stray "non"/"pre" + * elsewhere in a name is ignored. + */ +const PRODUCTION_NEGATORS: ReadonlySet = new Set(['non', 'not']); +const PREPROD_QUALIFIERS: ReadonlySet = new Set(['pre']); + +/** + * Whether a set of detected environments confirms environment SEPARATION as the + * control intends: production must be present AND at least one non-production + * environment (staging/development/test/sandbox). Two non-production + * environments alone (e.g. dev + staging) do NOT demonstrate that production is + * segregated, so they must not pass. + */ +export function confirmsEnvironmentSeparation( + envs: ReadonlyArray, +): boolean { + return ( + envs.includes(PRODUCTION_ENV) && envs.some((e) => e !== PRODUCTION_ENV) + ); +} + +/** Split on any run of non-alphanumeric chars; lowercased, empties removed. */ +function tokenize(value: string): string[] { + return value + .toLowerCase() + .split(/[^a-z0-9]+/) + .filter((t) => t.length > 0); +} + +/** + * Classify a single tokenized candidate, or null. A qualified production token + * is resolved FIRST — "non-prod"/"not-prod" (and the joined "nonprod") → + * non-production, "pre-prod" → staging — so the bare production keyword below + * can't win it back. Otherwise tokens are matched exactly against each + * environment set (production first, so it wins when several tokens are present). + */ +function classifyTokens(tokens: string[]): string | null { + let prev: string | undefined; + for (const token of tokens) { + if (NON_PRODUCTION_TOKENS.has(token)) return NON_PRODUCTION_ENV; + if (PRODUCTION_TOKENS.has(token) && prev) { + if (PRODUCTION_NEGATORS.has(prev)) return NON_PRODUCTION_ENV; + if (PREPROD_QUALIFIERS.has(prev)) return STAGING_ENV; + } + prev = token; + } + for (const { env, tokens: keywords } of ENV_TOKEN_SETS) { + if (tokens.some((t) => keywords.has(t))) return env; + } + return null; +} + +/** + * Classify a list of candidate strings (e.g. an environment tag/label value, + * then a resource name) into a canonical environment, or null if none match. + * Candidates are tried in order, so callers should pass the most authoritative + * source (explicit env tag/label value) before the resource name. + */ +export function classifyEnvironment( + candidates: ReadonlyArray, +): string | null { + for (const candidate of candidates) { + if (!candidate) continue; + const env = classifyTokens(tokenize(candidate)); + if (env) return env; + } + return null; +} + +/** + * Extract the values of environment-indicating tag/label keys from a tag map. + * Key matching is case-insensitive (Azure/AWS tag keys vary in casing). Only + * the configured env keys are read — arbitrary tag values are deliberately NOT + * scanned, so a stray `team=dev-team` tag can't fabricate an environment. + */ +export function envTagValues( + tags: Record | undefined, + keys: ReadonlyArray = ENV_TAG_KEYS, +): string[] { + if (!tags) return []; + // Iterate the configured keys in PRIORITY order (not the tag map's insertion + // order) so a more authoritative key (`environment`) is returned before a + // less authoritative one (`stage`) — `classifyEnvironment` trusts order. + const normalized = new Map( + Object.entries(tags).map(([k, v]) => [k.toLowerCase(), v]), + ); + return keys + .map((k) => normalized.get(k.toLowerCase())) + .filter((v): v is string => typeof v === 'string' && v.length > 0); +} diff --git a/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts b/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts index b2552eb7a2..2f7703101d 100644 --- a/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts +++ b/packages/integration-platform/src/manifests/gcp/checks/__tests__/gcp-checks.test.ts @@ -8,6 +8,10 @@ import { cloudMonitoringAlertingCheck } from '../cloud-monitoring-alerting'; import { cloudSqlBackupsCheck } from '../cloud-sql-backups'; import { cloudSqlEncryptionCheck } from '../cloud-sql-encryption'; import { cloudSqlSslCheck } from '../cloud-sql-ssl'; +import { + classifyProjectEnv, + environmentSeparationCheck, +} from '../environment-separation'; import { iamPrimitiveRolesCheck } from '../iam-primitive-roles'; import { storageEncryptionCheck } from '../storage-encryption'; import { storagePublicAccessCheck } from '../storage-public-access'; @@ -883,3 +887,187 @@ describe('GCP Cloud SQL encryption check', () => { expect(out.failed[0]!.title).toMatch(/Could not verify Cloud SQL encryption/); }); }); + +describe('classifyProjectEnv — token matching', () => { + it('classifies by name token (any separator, incl. underscore)', () => { + expect(classifyProjectEnv({ projectId: 'myapp-prod' })).toBe('production'); + expect(classifyProjectEnv({ projectId: 'myapp-dev-123' })).toBe('development'); + expect(classifyProjectEnv({ projectId: 'web-staging' })).toBe('staging'); + expect(classifyProjectEnv({ projectId: 'myapp_prod' })).toBe('production'); + }); + + it('prefers an explicit environment label over the name', () => { + expect( + classifyProjectEnv({ projectId: 'proj-001', labels: { environment: 'production' } }), + ).toBe('production'); + expect( + classifyProjectEnv({ projectId: 'proj-002', labels: { env: 'qa' } }), + ).toBe('test'); + }); + + it('does NOT false-match substrings like product/developer', () => { + expect(classifyProjectEnv({ projectId: 'product-catalog' })).toBeNull(); + expect(classifyProjectEnv({ projectId: 'developer-portal' })).toBeNull(); + expect(classifyProjectEnv({ projectId: 'data-warehouse' })).toBeNull(); + }); + + it('treats preprod as staging, not production', () => { + expect(classifyProjectEnv({ projectId: 'app-preprod' })).toBe('staging'); + }); +}); + +describe('GCP environment-separation check', () => { + const status = (err: Error, code: number) => { + (err as Error & { status: number }).status = code; + return err; + }; + // `variables: {}` forces unscoped discovery (the project_ids-less default), + // so the mock can return the `/v1/projects` list shape. + const UNSCOPED = {}; + + it('passes when production is separated from a non-production env (by name)', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => ({ + projects: [{ projectId: 'myapp-prod' }, { projectId: 'myapp-dev' }], + }), + }); + expect(out.failed).toHaveLength(0); + expect(out.passed).toHaveLength(1); + expect(out.passed[0]!.title).toMatch(/Environments separated/); + expect(out.passed[0]!.evidence).toMatchObject({ + detectedEnvironments: expect.arrayContaining(['production', 'development']), + }); + }); + + it('passes when environments are distinguished by labels (prod + staging)', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => ({ + projects: [ + { projectId: 'a', labels: { environment: 'production' } }, + { projectId: 'b', labels: { environment: 'staging' } }, + ], + }), + }); + expect(out.failed).toHaveLength(0); + expect(out.passed).toHaveLength(1); + }); + + it('evaluates projects across multiple pages (unscoped discovery)', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: (url) => { + if (url.includes('pageToken=tok2')) { + return { projects: [{ projectId: 'app-dev' }] }; + } + return { projects: [{ projectId: 'app-prod' }], nextPageToken: 'tok2' }; + }, + }); + expect(out.failed).toHaveLength(0); + expect(out.passed).toHaveLength(1); + }); + + it('honors project_ids scope: fetches selected projects, never lists all', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: { project_ids: ['p-prod', 'p-dev'] }, + fetch: (url) => { + if (url.includes('/v1/projects/p-prod')) { + return { projectId: 'p-prod', labels: { environment: 'production' } }; + } + if (url.includes('/v1/projects/p-dev')) { + return { projectId: 'p-dev', labels: { environment: 'development' } }; + } + // The list endpoint must NOT be called when projects are selected. + throw new Error(`unexpected list call: ${url}`); + }, + }); + expect(out.failed).toHaveLength(0); + expect(out.passed).toHaveLength(1); + }); + + it('fails when only non-production environments are present (no production)', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => ({ + projects: [{ projectId: 'app-dev' }, { projectId: 'app-staging' }], + }), + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not confirm environment separation/); + }); + + it('fails when only production is present (no non-production)', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => ({ projects: [{ projectId: 'myapp-prod' }] }), + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not confirm environment separation/); + expect(out.failed[0]!.remediation).toMatch(/distinct GCP projects/); + }); + + it('fails when no project can be classified', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => ({ + projects: [{ projectId: 'product-catalog' }, { projectId: 'backend' }], + }), + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not confirm environment separation/); + }); + + it('surfaces truncation as "could not verify" when discovery is capped', async () => { + // Every page returns more pages → the 20-page cap trips; classify only + // non-prod so the verdict is a fail that must disclose the partial scan. + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => ({ + projects: [{ projectId: 'app-dev' }], + nextPageToken: 'next', + }), + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not verify environment separation/); + expect(out.failed[0]!.evidence).toMatchObject({ discoveryTruncated: true }); + }); + + it('fails when no projects are accessible (unscoped)', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => ({ projects: [] }), + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/No GCP projects detected/); + }); + + it('fails "could not verify" when unscoped discovery read fails', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: UNSCOPED, + fetch: () => { + throw status(new Error('HTTP 403: Forbidden'), 403); + }, + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not verify environment separation/); + }); + + it('fails "could not verify" when a selected (scoped) project cannot be read', async () => { + const out = await runCheck(environmentSeparationCheck, { + variables: { project_ids: ['p1'] }, + fetch: () => { + throw status(new Error('HTTP 403: Forbidden'), 403); + }, + }); + expect(out.passed).toHaveLength(0); + expect(out.failed).toHaveLength(1); + expect(out.failed[0]!.title).toMatch(/Could not verify environment separation/); + }); +}); diff --git a/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts b/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts new file mode 100644 index 0000000000..262dd53df3 --- /dev/null +++ b/packages/integration-platform/src/manifests/gcp/checks/environment-separation.ts @@ -0,0 +1,237 @@ +import { TASK_TEMPLATES } from '../../../task-mappings'; +import type { CheckContext, IntegrationCheck } from '../../../types'; +import { + classifyEnvironment, + confirmsEnvironmentSeparation, + envTagValues, +} from '../../environment-classification'; +import { + remediationForReadFailure, + toHttpReadFailure, +} from '../../http-read-failure'; + +interface GcpProject { + projectId: string; + name?: string; + labels?: Record; +} + +interface ResolvedProjects { + projects: GcpProject[]; + /** True when UNSCOPED discovery hit the page cap (a scoped fetch is exact). */ + truncated: boolean; + /** Set when a selected project could not be read (scoped path). */ + readError?: string; +} + +/** + * Classify a project into an environment bucket, or null if undetermined. + * Explicit `environment`/`env` label values are most authoritative, then the + * project id / display name. Token matching (shared classifier) means + * "product"/"developer" do NOT match "prod"/"dev" and separator style + * (`-`/`_`/`.`) doesn't matter. + */ +export function classifyProjectEnv(project: GcpProject): string | null { + return classifyEnvironment([ + ...envTagValues(project.labels), + project.projectId, + project.name, + ]); +} + +/** The user-selected project scope (`project_ids` variable), trimmed. */ +function selectedProjectIds(ctx: CheckContext): string[] { + const selected = ctx.variables.project_ids; + if (!Array.isArray(selected)) return []; + return selected + .filter((s): s is string => typeof s === 'string') + .map((s) => s.trim()) + .filter((s) => s.length > 0); +} + +/** + * Resolve the projects to evaluate, HONORING the `project_ids` opt-in scope: + * when projects are selected we fetch exactly those (no truncation possible); + * otherwise we discover all active projects with a bounded page walk and report + * `truncated` so a partial footprint is never presented as a complete verdict. + */ +async function resolveProjects(ctx: CheckContext): Promise { + const selected = selectedProjectIds(ctx); + + if (selected.length > 0) { + const projects: GcpProject[] = []; + let readError: string | undefined; + for (const id of selected) { + try { + const project = await ctx.fetch( + `/v1/projects/${encodeURIComponent(id)}`, + ); + if (project && typeof project.projectId === 'string') { + projects.push(project); + } + } catch (err) { + readError = toHttpReadFailure(err).error; + ctx.log(`GCP env-separation: could not read project ${id} — ${readError}`); + } + } + return { projects, truncated: false, readError }; + } + + const projects: GcpProject[] = []; + let pageToken: string | undefined; + let pages = 0; + do { + const tokenParam = pageToken + ? `&pageToken=${encodeURIComponent(pageToken)}` + : ''; + const data = await ctx.fetch<{ + projects?: GcpProject[]; + nextPageToken?: string; + }>( + `/v1/projects?filter=${encodeURIComponent('lifecycleState:ACTIVE')}&pageSize=100${tokenParam}`, + ); + for (const p of data.projects ?? []) projects.push(p); + pageToken = + typeof data.nextPageToken === 'string' ? data.nextPageToken : undefined; + pages++; + } while (pageToken && pages < 20); + + return { projects, truncated: Boolean(pageToken) }; +} + +const GUIDANCE = + 'Separate production and non-production workloads into distinct GCP projects and label each with an `environment` label (e.g. environment=production, environment=staging). If you separate environments another way (e.g. VPCs or folders), upload a console screenshot or architecture diagram as evidence.'; + +/** + * Separation of Environments check (heuristic). GCP's recommended pattern is a + * separate project per environment, so this infers separation from the project + * footprint: it classifies each in-scope project into an environment (by + * `environment`/`env` label, else name/id token) and passes only when it can + * confirm a PRODUCTION environment is separated from at least one + * NON-PRODUCTION environment. + * + * It honors the `project_ids` opt-in scope, never presents a truncated + * discovery as complete, and is evidence-first: when it cannot confirm + * separation it emits actionable guidance (label projects, or upload a diagram) + * rather than a silent pass. + */ +export const environmentSeparationCheck: IntegrationCheck = { + id: 'gcp-environment-separation', + name: 'Separation of environments — production isolated from non-production', + description: + 'Verify production and non-production workloads are separated across distinct GCP projects.', + service: 'iam', + taskMapping: TASK_TEMPLATES.separationOfEnvironments, + + run: async (ctx: CheckContext) => { + let resolved: ResolvedProjects; + try { + resolved = await resolveProjects(ctx); + } catch (err) { + const failure = toHttpReadFailure(err); + ctx.fail({ + title: 'Could not verify environment separation', + description: `GCP projects could not be listed (${failure.error}), so environment separation could not be evaluated.`, + resourceType: 'gcp-environment-separation', + resourceId: 'projects', + severity: 'medium', + remediation: remediationForReadFailure( + failure, + 'Grant resourcemanager.projects.list (e.g. roles/viewer) to the connection, then re-run the check.', + ), + evidence: { readError: failure.error }, + }); + return; + } + + const { projects, truncated, readError } = resolved; + + if (projects.length === 0) { + // A read failure (scoped projects unreadable) is "could not verify"; a + // genuinely empty footprint is "no projects". + ctx.fail({ + title: readError + ? 'Could not verify environment separation' + : 'No GCP projects detected', + description: readError + ? `Selected GCP projects could not be read (${readError}), so environment separation could not be evaluated.` + : 'No GCP projects were in scope, so environment separation could not be evaluated.', + resourceType: 'gcp-environment-separation', + resourceId: 'projects', + severity: 'medium', + remediation: readError + ? `Grant resourcemanager.projects.get (e.g. roles/viewer) for the selected projects, then re-run. ${GUIDANCE}` + : `Grant resourcemanager.projects.list to the connection (or select projects in the integration settings), then re-run. ${GUIDANCE}`, + evidence: { projectCount: 0, ...(readError ? { readError } : {}) }, + }); + return; + } + + const classified = projects.map((p) => ({ + projectId: p.projectId, + environment: classifyProjectEnv(p), + })); + const detected = [ + ...new Set( + classified + .map((c) => c.environment) + .filter((e): e is string => e !== null), + ), + ]; + const sample = classified.slice(0, 50).map((c) => ({ + projectId: c.projectId, + environment: c.environment ?? 'unclassified', + })); + + // A confirmed pass requires production + a non-production environment. + // Truncation/read gaps cannot turn a confirmed pass into a wrong one + // (scanning more projects only ADDS environments), so a pass stands. + if (confirmsEnvironmentSeparation(detected)) { + ctx.pass({ + title: 'Environments separated across projects', + description: `Detected production separated from non-production across ${projects.length} GCP project(s): ${detected.join(', ')}.`, + resourceType: 'gcp-environment-separation', + resourceId: 'projects', + evidence: { + detectedEnvironments: detected, + projectCount: projects.length, + projects: sample, + }, + }); + return; + } + + // Could not confirm. Surface any incomplete coverage so a partial footprint + // is never presented as a complete "not separated" verdict. + const coverageGaps: string[] = []; + if (truncated) { + coverageGaps.push( + 'project discovery hit the page cap, so not all projects were evaluated', + ); + } + if (readError) { + coverageGaps.push('some selected projects could not be read'); + } + const base = + detected.length === 0 + ? `No GCP project could be classified by environment across ${projects.length} project(s)` + : `Detected environment(s) ${detected.join(', ')}, but could not confirm a production environment separated from a non-production one across ${projects.length} project(s)`; + ctx.fail({ + title: + coverageGaps.length > 0 + ? 'Could not verify environment separation' + : 'Could not confirm environment separation', + description: `${base}${coverageGaps.length ? ` (${coverageGaps.join('; ')})` : ''}.`, + resourceType: 'gcp-environment-separation', + resourceId: 'projects', + severity: 'medium', + remediation: GUIDANCE, + evidence: { + detectedEnvironments: detected, + projectCount: projects.length, + ...(truncated ? { discoveryTruncated: true } : {}), + projects: sample, + }, + }); + }, +}; diff --git a/packages/integration-platform/src/manifests/gcp/checks/index.ts b/packages/integration-platform/src/manifests/gcp/checks/index.ts index cce40f8388..efad32512b 100644 --- a/packages/integration-platform/src/manifests/gcp/checks/index.ts +++ b/packages/integration-platform/src/manifests/gcp/checks/index.ts @@ -6,3 +6,4 @@ export { cloudSqlBackupsCheck } from './cloud-sql-backups'; export { cloudMonitoringAlertingCheck } from './cloud-monitoring-alerting'; export { storageEncryptionCheck } from './storage-encryption'; export { cloudSqlEncryptionCheck } from './cloud-sql-encryption'; +export { environmentSeparationCheck } from './environment-separation'; diff --git a/packages/integration-platform/src/manifests/gcp/index.ts b/packages/integration-platform/src/manifests/gcp/index.ts index 2cd7394c06..d9c0b84f00 100644 --- a/packages/integration-platform/src/manifests/gcp/index.ts +++ b/packages/integration-platform/src/manifests/gcp/index.ts @@ -4,6 +4,7 @@ import { cloudSqlBackupsCheck, cloudSqlEncryptionCheck, cloudSqlSslCheck, + environmentSeparationCheck, iamPrimitiveRolesCheck, storageEncryptionCheck, storagePublicAccessCheck, @@ -168,5 +169,6 @@ This is industry standard - all GCP security monitoring tools use the same scope cloudMonitoringAlertingCheck, storageEncryptionCheck, cloudSqlEncryptionCheck, + environmentSeparationCheck, ], };