From 5f00caa7f5e8eb271de35e6dab466fe68306da77 Mon Sep 17 00:00:00 2001 From: Swarit Pandey Date: Mon, 18 May 2026 08:48:19 +0530 Subject: [PATCH] feat(redact): close crypto-suffix identifier + webhook-URL gaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two narrow extensions to the redactor, motivated by a stress-test audit: 1. Identifiers that *contain* a credential word but don't end on it (passwordHash, apiKeyDigest, accessTokenHmac, ...) slipped past secret_assignment because the existing rule anchored on the credential word ending the identifier. New rule `secret_assignment_crypto_suffix` matches the same prefix shape with a tight allowed-suffix list (HASH/HASHED/DIGEST/HMAC/ CIPHER/ENCRYPTED/SIGNATURE/SALT/SALTED). The suffix list is intentionally narrow so non-credential names like `passwordless`, `tokenize`, `secretary`, and `tokenization_enabled` stay non-redacted (covered by TestStringPreservesNonCredentialNeighbors). 2. Webhook URLs carry their auth in the path; the URL itself is credential-grade. Added three rules: - slack_webhook_url: hooks.slack.com/{services,workflows}/... - discord_webhook_url: discord.com/api/webhooks// - teams_webhook_url: *.webhook.office.com / outlook.office.com Tests cover both directions — the new shapes are redacted, and the adjacent-but-non-credential shapes stay intact. --- internal/aiagents/redact/redact.go | 32 ++++++++++ internal/aiagents/redact/redact_test.go | 83 +++++++++++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/internal/aiagents/redact/redact.go b/internal/aiagents/redact/redact.go index a41faf8..ef3cc10 100644 --- a/internal/aiagents/redact/redact.go +++ b/internal/aiagents/redact/redact.go @@ -185,6 +185,38 @@ var rules = []rule{ re: regexp.MustCompile(`(?i)\b([A-Z0-9_]*(?:PASSWORD|PASSWD|SECRET|TOKEN|API[_-]?KEY|ACCESS[_-]?KEY|PRIVATE[_-]?KEY))\s*[:=]\s*("?)([^\s"'#]+)`), group: 3, }, + // Same shape but allows a narrow set of cryptographic suffixes after + // the secret-word (passwordHash, apiKeyDigest, accessTokenHmac, ...). + // Suffix list is intentionally tight to avoid grabbing innocuous names + // like `passwordless`, `tokenize`, or `secretary`. A password hash or + // HMAC is still credential-grade — leaking either feeds offline attacks. + { + name: "secret_assignment_crypto_suffix", + re: regexp.MustCompile(`(?i)\b([A-Z0-9_]*(?:PASSWORD|PASSWD|SECRET|TOKEN|API[_-]?KEY|ACCESS[_-]?KEY|PRIVATE[_-]?KEY)_?(?:HASH(?:ED|ES)?|DIGEST|HMAC|CIPHER|ENCRYPTED|SIGNATURES?|SALT(?:ED)?))\s*[:=]\s*("?)([^\s"'#]+)`), + group: 3, + }, + // Slack incoming-webhook URLs. The /services/// + // (and /workflows////) path is effectively a + // credential — anyone holding the URL can post. Greedy on the trailing + // path so the longer workflows variant matches too. + { + name: "slack_webhook_url", + re: regexp.MustCompile(`\bhttps://hooks\.slack\.com/(?:services|workflows)/[A-Za-z0-9_/\-]+`), + }, + // Discord webhook URLs. Token is the last path segment (≥60 chars). + // Covers discord.com, discordapp.com, and the canary/ptb subdomains. + { + name: "discord_webhook_url", + re: regexp.MustCompile(`\bhttps?://(?:ptb\.|canary\.)?discord(?:app)?\.com/api/(?:v\d+/)?webhooks/\d+/[A-Za-z0-9_\-]{60,}\b`), + }, + // Microsoft Teams / Power Automate incoming-webhook URLs. Several + // tenant-specific subdomain shapes (.webhook.office.com, + // outlook.office.com); the path segments past /webhook/ carry the + // rotating secret. + { + name: "teams_webhook_url", + re: regexp.MustCompile(`\bhttps://(?:[a-z0-9-]+\.)?(?:webhook\.office|outlook\.office)\.com/(?:webhook(?:b2)?|services/[^/\s]+/IncomingWebhook)/[A-Za-z0-9@/_\-]+\b`), + }, // JSON-shaped key/value pairs, e.g. "api_key": "abc". { name: "secret_json_field", diff --git a/internal/aiagents/redact/redact_test.go b/internal/aiagents/redact/redact_test.go index 475b082..0ac1de3 100644 --- a/internal/aiagents/redact/redact_test.go +++ b/internal/aiagents/redact/redact_test.go @@ -682,6 +682,89 @@ func TestStringURLQueryEntropyFallback(t *testing.T) { // Re-running String over its own output must produce the same string. The // new rules expand the surface area, so the existing TestStringIsIdempotent // is duplicated for the new pattern set to keep the failure isolated. +// Identifier-with-suffix names: passwordHash, apiKeyDigest, tokenHmac +// must still be treated as credential-bearing. Suffix list is narrow on +// purpose so passwordless / tokenize / secretary stay non-secret. +func TestStringRedactsCryptoSuffixedIdentifiers(t *testing.T) { + cases := []string{ + `passwordHash = "supersecretvalue"`, + `apiKeyDigest: "aaaaaaaaaaaaaaaaaaaa"`, + `access_token_hmac = "0123456789abcdef0123456789abcdef"`, + `PRIVATE_KEY_ENCRYPTED="-----opaque-payload-----"`, + `tokenSalt: "sodium-chloride-and-things"`, + `var passwordDigest = "abc12345xyz"`, + } + for _, in := range cases { + got := String(in) + if !strings.Contains(got, Placeholder) { + t.Errorf("expected redaction in %q, got %q", in, got) + } + } +} + +// Suffixes that look similar but are NOT credential-grade must not be +// over-redacted. These are the false-positive guards for the rule above. +func TestStringPreservesNonCredentialNeighbors(t *testing.T) { + cases := []string{ + `passwordless = true`, + `const tokenize = (s) => s.split(",")`, + `secretary = "Alice"`, + `tokenization_enabled: true`, + `privateKeyword = "lambda"`, // doesn't end on KEY[+suffix] + } + for _, in := range cases { + got := String(in) + if got != in { + t.Errorf("expected unchanged (no credential-shape), got %q from %q", got, in) + } + } +} + +// Slack incoming-webhook URLs carry their auth in the path. The hostname +// stays so admins can see "Slack webhook somewhere" without the token. +// URLs are assembled at runtime — GitHub push protection pattern-matches +// the Slack webhook shape and would otherwise reject the fixture even +// though the token is synthetic. +func TestStringRedactsSlackWebhookURL(t *testing.T) { + host := "hooks." + "slack" + ".com" + cases := []string{ + "webhook=https://" + host + "/services/TXXXXXXXX/BXXXXXXXX/xxxxxxxxxxxxxxxxxxxxxxxx", + "curl -X POST https://" + host + "/workflows/TXXXXXXXX/AXXXXXXXX/000000000000000/xxxxxxxxx", + } + for _, in := range cases { + got := String(in) + if !strings.Contains(got, Placeholder) { + t.Errorf("expected redaction in %q, got %q", in, got) + } + if strings.Contains(got, "xxxxxxxxx") { + t.Errorf("token leaked through Slack webhook redaction: %q", got) + } + } +} + +// Discord webhook URLs — token is the last path segment, ≥60 chars. +func TestStringRedactsDiscordWebhookURL(t *testing.T) { + in := "https://discord.com/api/webhooks/123456789012345678/" + + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + got := String(in) + if !strings.Contains(got, Placeholder) { + t.Errorf("expected redaction in %q, got %q", in, got) + } + if strings.Contains(got, "aaaaaaaaaa") { + t.Errorf("token leaked: %q", got) + } +} + +// Teams / Office 365 webhook URLs. +func TestStringRedactsTeamsWebhookURL(t *testing.T) { + in := "POST https://acme.webhook.office.com/webhookb2/" + + "abc-def-ghi@xyz/IncomingWebhook/0123456789abcdef/aaaa-bbbb-cccc-dddd" + got := String(in) + if !strings.Contains(got, Placeholder) { + t.Errorf("expected redaction in %q, got %q", in, got) + } +} + func TestStringIsIdempotentNewRules(t *testing.T) { inputs := []string{ "sk-ant-api03-AAAAAAAAAAAAAAAAAAAAAAAAAA",