Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/site/seo.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const SEO = (props: SeoProps): JSX.Element => {
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="canonical" href={router.asPath}></link>
<link rel="shortcut icon" type="image/png" href="/favicon.png" />
<meta name="robots" content="index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1" />
<meta name="robots" content="noindex, nofollow" />
{title && (
<>
<title>{title}</title>
Expand Down
9 changes: 8 additions & 1 deletion netlify.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ package = "@netlify/plugin-nextjs"
for = "/*"

[headers.values]
# Applies to every host this deploy serves. The host-conditional-noindex
# edge function strips this header (and the noindex <meta> tag) for
# support.sparkpost.com requests during the CloudFront migration gap, so
# only archive.sparkpost.com (and deploy previews) actually receive it.
# See netlify/edge-functions/host-conditional-noindex.ts.
X-Robots-Tag = "noindex, nofollow"

Strict-Transport-Security = '''
max-age=31536000;
includeSubDomains;'''
Expand Down Expand Up @@ -47,7 +54,7 @@ package = "@netlify/plugin-nextjs"
'self'
data:
fonts.gstatic.com
support.sparkpost.com;
archive.sparkpost.com;
connect-src
*
data:
Expand Down
99 changes: 99 additions & 0 deletions netlify/edge-functions/host-conditional-noindex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Temporary edge function for the support.sparkpost.com → archive.sparkpost.com
// migration. Until the CloudFront redirect distribution is live (then DNS for
// support.sparkpost.com flips off Netlify), this single Netlify deploy serves
// both hostnames. The desired SEO behavior is asymmetric:
//
// archive.sparkpost.com: served as built — noindex <meta>, X-Robots-Tag, and
// Disallow: / in robots.txt all intact, so search engines drop the archive.
// support.sparkpost.com: keep search engines indexing as before, so existing
// link equity is preserved until the CloudFront layer can 301 it to bird.com.
//
// Without this function, the noindex signals (set in seo.tsx, netlify.toml, and
// next-sitemap.js) would apply to both hostnames equally — which would start
// deindexing support.sparkpost.com before the 301s exist, losing link equity
// that would otherwise transfer to bird.com via the CloudFront redirects.
//
// This function differentiates by Host header:
// - support.sparkpost.com: override X-Robots-Tag with "all", strip the noindex
// <meta> from HTML responses, and override /robots.txt with a permissive
// version.
// - archive.sparkpost.com or any other host (deploy previews, *.netlify.app):
// pass through unmodified.
//
// REMOVE THIS FUNCTION (and the netlify.toml registration) once the CloudFront
// cutover is complete — support.sparkpost.com will no longer hit Netlify, so the
// hostname-conditional logic becomes dead code.

import type { Context } from '@netlify/edge-functions';

const INDEX_HOST = 'support.sparkpost.com';

// Match <meta name="robots" content="...noindex..."> in either attribute order
// and with whitespace variations. Only the noindex/nofollow robots meta is
// targeted — any other meta tags (description, og:*, viewport, etc.) are left
// alone.
const NOINDEX_META_PATTERNS: RegExp[] = [
/<meta\s[^>]*name=["']robots["'][^>]*content=["'][^"']*noindex[^"']*["'][^>]*\/?>/gi,
/<meta\s[^>]*content=["'][^"']*noindex[^"']*["'][^>]*name=["']robots["'][^>]*\/?>/gi,
];

export default async (request: Request, context: Context): Promise<Response | void> => {
const host = request.headers.get('host') ?? '';

// Only act on support.sparkpost.com. Every other host — including
// archive.sparkpost.com, *.netlify.app deploy URLs, and deploy previews —
// passes through with whatever the build emits (i.e. noindex stays on).
if (host !== INDEX_HOST) return;

const url = new URL(request.url);

// Override /robots.txt with a permissive version so Google can crawl
// support.sparkpost.com normally. Without this, the built robots.txt's
// `Disallow: /` would block Google from re-crawling, which would prevent it
// from seeing the 301s once CloudFront comes online.
if (url.pathname === '/robots.txt') {
return new Response('User-agent: *\n', {
status: 200,
headers: { 'Content-Type': 'text/plain; charset=utf-8' },
});
}

// For everything else, fetch the response normally, then mutate it.
const response = await context.next();

// Override the X-Robots-Tag set in netlify.toml. `delete()` on this header is
// silently ignored — Netlify re-applies the netlify.toml [[headers]] block
// after the edge function returns, but it respects values WE set. So we
// overwrite with "all" (canonical "ignore any prior noindex; index normally"),
// which Google treats as equivalent to no header at all.
response.headers.set('X-Robots-Tag', 'all');

// Only HTML responses can carry the noindex <meta> tag. Skip non-HTML
// (images, JS, CSS, JSON) to avoid pointlessly buffering large bodies.
const contentType = response.headers.get('content-type') ?? '';
if (!contentType.includes('text/html')) {
return response;
}

let body = await response.text();
for (const pattern of NOINDEX_META_PATTERNS) {
body = body.replace(pattern, '');
}

// Rebuild the response with the mutated body. content-length is dropped so
// Netlify recomputes it. content-encoding is dropped because response.text()
// already decoded any gzip/brotli the origin sent — leaving the header on
// would tell the client to decompress a now-plain-text body and fail.
const headers = new Headers(response.headers);
headers.delete('content-length');
headers.delete('content-encoding');
return new Response(body, {
status: response.status,
statusText: response.statusText,
headers,
});
Comment thread
cursor[bot] marked this conversation as resolved.
};

export const config = {
path: '/*',
};
3 changes: 2 additions & 1 deletion next-sitemap.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
module.exports = {
siteUrl: process.env.SITE_URL || 'https://support.sparkpost.com',
siteUrl: process.env.SITE_URL || 'https://archive.sparkpost.com',
generateRobotsTxt: true,
sitemapSize: 5000,
robotsTxtOptions: {
policies: [
{
userAgent: '*',
disallow: ['/'],
},
],
},
Expand Down
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@
"@context/*": ["context/*"]
},
"include": ["next-env.d.ts", "yaml.d.ts", "**/*.ts", "**/*.tsx"],
"exclude": ["node_modules"]
"exclude": ["node_modules", "netlify"]
}
Loading