From a3202d656f6812d366e155cf096fe5b63d6a2bcd Mon Sep 17 00:00:00 2001 From: PauloJeunon Date: Thu, 25 Jun 2026 11:27:37 -0300 Subject: [PATCH 1/4] Switch base URL to archive.sparkpost.com and noindex the site Moves the docs site from support.sparkpost.com to archive.sparkpost.com so the content can be archived (and later 301'd to bird.com) without competing with bird.com in search. Adds noindex/nofollow at three layers (robots.txt, , X-Robots-Tag header) so search engines drop the archive. Co-Authored-By: Claude Opus 4.7 (1M context) --- components/site/seo.tsx | 2 +- netlify.toml | 4 +++- next-sitemap.js | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/components/site/seo.tsx b/components/site/seo.tsx index 734fb5906..0482b7dc7 100644 --- a/components/site/seo.tsx +++ b/components/site/seo.tsx @@ -17,7 +17,7 @@ const SEO = (props: SeoProps): JSX.Element => { - + {title && ( <> {title} diff --git a/netlify.toml b/netlify.toml index c966f17c5..d891011c5 100644 --- a/netlify.toml +++ b/netlify.toml @@ -9,6 +9,8 @@ package = "@netlify/plugin-nextjs" for = "/*" [headers.values] + X-Robots-Tag = "noindex, nofollow" + Strict-Transport-Security = ''' max-age=31536000; includeSubDomains;''' @@ -47,7 +49,7 @@ package = "@netlify/plugin-nextjs" 'self' data: fonts.gstatic.com - support.sparkpost.com; + archive.sparkpost.com; connect-src * data: diff --git a/next-sitemap.js b/next-sitemap.js index 30272e766..73f28ed3d 100644 --- a/next-sitemap.js +++ b/next-sitemap.js @@ -1,11 +1,12 @@ module.exports = { - siteUrl: process.env.SITE_URL || 'https://support.sparkpost.com', + siteUrl: process.env.SITE_URL || 'https://archive.sparkpost.com', generateRobotsTxt: true, sitemapSize: 5000, robotsTxtOptions: { policies: [ { userAgent: '*', + disallow: ['/'], }, ], }, From 56fa499939702e1ce4ef2af5434d9c41c42ad84e Mon Sep 17 00:00:00 2001 From: PauloJeunon Date: Fri, 26 Jun 2026 16:35:51 -0300 Subject: [PATCH 2/4] Strip noindex from support.sparkpost.com via Netlify Edge Function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The base-URL + noindex change in this branch applies to every hostname this deploy serves. During the gap between this PR landing and the CloudFront redirect distribution going live for support.sparkpost.com, that's incorrect for SEO: archive.sparkpost.com SHOULD be noindex (it's the archived copy), but support.sparkpost.com SHOULD still be indexed so its existing link equity is preserved until CloudFront can 301 it to bird.com counterparts. If support is noindex'd during the gap, Google can't see the eventual 301s (robots.txt Disallow blocks re-crawl), so equity that would otherwise transfer to bird.com is lost instead. The longer the gap, the more decays. Add a Netlify Edge Function that runs on every request and, for requests with Host: support.sparkpost.com: - serves a permissive /robots.txt (no Disallow:/) - strips X-Robots-Tag from response headers - strips the tag from HTML archive.sparkpost.com and all other hosts (deploy previews, *.netlify.app) pass through unmodified — noindex stays in place. REMOVE this function (and the netlify.toml comment pointing at it) once the CloudFront cutover is complete. After that, support.sparkpost.com no longer hits Netlify, so the hostname-conditional logic becomes dead code. Co-Authored-By: Claude Opus 4.7 (1M context) --- netlify.toml | 5 + .../host-conditional-noindex.ts | 91 +++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 netlify/edge-functions/host-conditional-noindex.ts diff --git a/netlify.toml b/netlify.toml index d891011c5..99a583815 100644 --- a/netlify.toml +++ b/netlify.toml @@ -9,6 +9,11 @@ package = "@netlify/plugin-nextjs" for = "/*" [headers.values] + # Applies to every host this deploy serves. The host-conditional-noindex + # edge function strips this header (and the noindex tag) for + # support.sparkpost.com requests during the CloudFront migration gap, so + # only archive.sparkpost.com (and deploy previews) actually receive it. + # See netlify/edge-functions/host-conditional-noindex.ts. X-Robots-Tag = "noindex, nofollow" Strict-Transport-Security = ''' diff --git a/netlify/edge-functions/host-conditional-noindex.ts b/netlify/edge-functions/host-conditional-noindex.ts new file mode 100644 index 000000000..877f3d694 --- /dev/null +++ b/netlify/edge-functions/host-conditional-noindex.ts @@ -0,0 +1,91 @@ +// Temporary edge function for the support.sparkpost.com → archive.sparkpost.com +// migration. Until the CloudFront redirect distribution is live (then DNS for +// support.sparkpost.com flips off Netlify), this single Netlify deploy serves +// both hostnames. The desired SEO behavior is asymmetric: +// +// archive.sparkpost.com: served as built — noindex , X-Robots-Tag, and +// Disallow: / in robots.txt all intact, so search engines drop the archive. +// support.sparkpost.com: keep search engines indexing as before, so existing +// link equity is preserved until the CloudFront layer can 301 it to bird.com. +// +// Without this function, the noindex signals (set in seo.tsx, netlify.toml, and +// next-sitemap.js) would apply to both hostnames equally — which would start +// deindexing support.sparkpost.com before the 301s exist, losing link equity +// that would otherwise transfer to bird.com via the CloudFront redirects. +// +// This function differentiates by Host header: +// - support.sparkpost.com: strip X-Robots-Tag, strip the noindex from +// HTML responses, and override /robots.txt with a permissive version. +// - archive.sparkpost.com or any other host (deploy previews, *.netlify.app): +// pass through unmodified. +// +// REMOVE THIS FUNCTION (and the netlify.toml registration) once the CloudFront +// cutover is complete — support.sparkpost.com will no longer hit Netlify, so the +// hostname-conditional logic becomes dead code. + +import type { Context } from '@netlify/edge-functions'; + +const INDEX_HOST = 'support.sparkpost.com'; + +// Match in either attribute order +// and with whitespace variations. Only the noindex/nofollow robots meta is +// targeted — any other meta tags (description, og:*, viewport, etc.) are left +// alone. +const NOINDEX_META_PATTERNS: RegExp[] = [ + /]*name=["']robots["'][^>]*content=["'][^"']*noindex[^"']*["'][^>]*\/?>/gi, + /]*content=["'][^"']*noindex[^"']*["'][^>]*name=["']robots["'][^>]*\/?>/gi, +]; + +export default async (request: Request, context: Context): Promise => { + const host = request.headers.get('host') ?? ''; + + // Only act on support.sparkpost.com. Every other host — including + // archive.sparkpost.com, *.netlify.app deploy URLs, and deploy previews — + // passes through with whatever the build emits (i.e. noindex stays on). + if (host !== INDEX_HOST) return; + + const url = new URL(request.url); + + // Override /robots.txt with a permissive version so Google can crawl + // support.sparkpost.com normally. Without this, the built robots.txt's + // `Disallow: /` would block Google from re-crawling, which would prevent it + // from seeing the 301s once CloudFront comes online. + if (url.pathname === '/robots.txt') { + return new Response('User-agent: *\n', { + status: 200, + headers: { 'Content-Type': 'text/plain; charset=utf-8' }, + }); + } + + // For everything else, fetch the response normally, then mutate it. + const response = await context.next(); + + // Strip the noindex HTTP header (set globally in netlify.toml). + response.headers.delete('X-Robots-Tag'); + + // Only HTML responses can carry the noindex tag. Skip non-HTML + // (images, JS, CSS, JSON) to avoid pointlessly buffering large bodies. + const contentType = response.headers.get('content-type') ?? ''; + if (!contentType.includes('text/html')) { + return response; + } + + let body = await response.text(); + for (const pattern of NOINDEX_META_PATTERNS) { + body = body.replace(pattern, ''); + } + + // Rebuild the response with the mutated body. content-length is dropped so + // Netlify can recompute it for the modified body. + const headers = new Headers(response.headers); + headers.delete('content-length'); + return new Response(body, { + status: response.status, + statusText: response.statusText, + headers, + }); +}; + +export const config = { + path: '/*', +}; From 788e98f5148e2d61d07bc52a94656a29d10a707c Mon Sep 17 00:00:00 2001 From: PauloJeunon Date: Fri, 26 Jun 2026 16:45:14 -0300 Subject: [PATCH 3/4] Override X-Robots-Tag with "all" instead of deleting it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Discovered via local netlify dev testing: `response.headers.delete('X-Robots-Tag')` is silently ignored when the header is set in netlify.toml's [[headers]] block. Netlify re-applies the netlify.toml headers after the edge function returns, overriding any deletions — but it respects values the function explicitly sets. Switch from delete() to set('X-Robots-Tag', 'all'). Google treats X-Robots-Tag: all as equivalent to no header (canonical "ignore any prior noindex; index normally"), so the effect is identical to the intended deletion. Other header sets (X-Edge-Probe sentinel during testing) confirmed mutations survive — only deletes of netlify.toml-sourced headers are eaten. Verified locally with netlify dev across the full host × content-type matrix: - archive.sparkpost.com HTML: X-Robots-Tag: noindex, nofollow + meta intact ✓ - support.sparkpost.com HTML: X-Robots-Tag: all + meta stripped ✓ - archive.sparkpost.com /robots.txt: Disallow: / served as built ✓ - support.sparkpost.com /robots.txt: permissive (User-agent: *) ✓ - support.sparkpost.com JSON: X-Robots-Tag: all + body untouched ✓ - archive.sparkpost.com JSON: X-Robots-Tag: noindex + body untouched ✓ - Deploy preview host (deploy-preview-*.netlify.app): noindex preserved ✓ Co-Authored-By: Claude Opus 4.7 (1M context) --- netlify/edge-functions/host-conditional-noindex.ts | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/netlify/edge-functions/host-conditional-noindex.ts b/netlify/edge-functions/host-conditional-noindex.ts index 877f3d694..2f98c2b5e 100644 --- a/netlify/edge-functions/host-conditional-noindex.ts +++ b/netlify/edge-functions/host-conditional-noindex.ts @@ -14,8 +14,9 @@ // that would otherwise transfer to bird.com via the CloudFront redirects. // // This function differentiates by Host header: -// - support.sparkpost.com: strip X-Robots-Tag, strip the noindex from -// HTML responses, and override /robots.txt with a permissive version. +// - support.sparkpost.com: override X-Robots-Tag with "all", strip the noindex +// from HTML responses, and override /robots.txt with a permissive +// version. // - archive.sparkpost.com or any other host (deploy previews, *.netlify.app): // pass through unmodified. // @@ -60,8 +61,12 @@ export default async (request: Request, context: Context): Promise tag. Skip non-HTML // (images, JS, CSS, JSON) to avoid pointlessly buffering large bodies. From 54d49ea22a091b857ad05e235577f74f351d8cd7 Mon Sep 17 00:00:00 2001 From: PauloJeunon Date: Fri, 26 Jun 2026 17:45:03 -0300 Subject: [PATCH 4/4] Address PR review: strip Content-Encoding; exclude edge functions from tsc Two issues flagged in the PR: 1. Cursor Bugbot (high severity): after rewriting an HTML response body in the edge function for support.sparkpost.com, only `content-length` was removed from the copied headers. If the origin's response was gzip- or brotli-encoded (which Netlify's CDN does opportunistically), the `response.text()` call decoded the body to plain text but the stale `Content-Encoding` header survived. Clients would then try to decompress plain text and fail. Fix: also delete `content-encoding` when rebuilding the response. 2. Build failure under tsc: `import type { Context } from '@netlify/edge-functions'` fails Next.js's TypeScript check because the package is only present in the Netlify edge build environment, not in node_modules. Edge functions run on Deno with Netlify-provided globals, not Node, so they're a separate compilation unit. Add `netlify` to tsconfig.json's `exclude` so `next build` (and the Cypress CI's prebuild) stops scanning the directory. Netlify's edge function build still typechecks the files on the server side with the correct types. Co-Authored-By: Claude Opus 4.7 (1M context) --- netlify/edge-functions/host-conditional-noindex.ts | 5 ++++- tsconfig.json | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/netlify/edge-functions/host-conditional-noindex.ts b/netlify/edge-functions/host-conditional-noindex.ts index 2f98c2b5e..b1e1a3dbc 100644 --- a/netlify/edge-functions/host-conditional-noindex.ts +++ b/netlify/edge-functions/host-conditional-noindex.ts @@ -81,9 +81,12 @@ export default async (request: Request, context: Context): Promise