diff --git a/packages/vinext/src/entries/pages-server-entry.ts b/packages/vinext/src/entries/pages-server-entry.ts
index 2477f2641..00eeb8a4f 100644
--- a/packages/vinext/src/entries/pages-server-entry.ts
+++ b/packages/vinext/src/entries/pages-server-entry.ts
@@ -114,6 +114,7 @@ export async function generateServerEntry(
headers: nextConfig?.headers ?? [],
expireTime: nextConfig?.expireTime,
cacheMaxMemorySize: nextConfig?.cacheMaxMemorySize,
+ htmlLimitedBots: nextConfig?.htmlLimitedBots,
i18n: nextConfig?.i18n ?? null,
// Mirrors Next.js `experimental.disableOptimizedLoading` — when false
// (the default), page scripts are emitted with `defer` in
. See
@@ -358,6 +359,7 @@ const _renderPage = __createPagesPageHandler({
assetPrefix: vinextConfig.assetPrefix,
trailingSlash: vinextConfig.trailingSlash,
expireTime: vinextConfig.expireTime,
+ htmlLimitedBots: vinextConfig.htmlLimitedBots,
clientTraceMetadata: vinextConfig.clientTraceMetadata,
disableOptimizedLoading: vinextConfig.disableOptimizedLoading,
},
diff --git a/packages/vinext/src/index.ts b/packages/vinext/src/index.ts
index 85fd8452d..393d7ab14 100644
--- a/packages/vinext/src/index.ts
+++ b/packages/vinext/src/index.ts
@@ -3793,6 +3793,7 @@ export default function vinext(options: VinextOptions = {}): PluginOption[] {
(nextConfig?.rewrites.afterFiles.length ?? 0) > 0 ||
(nextConfig?.rewrites.fallback.length ?? 0) > 0,
nextConfig?.clientTraceMetadata,
+ nextConfig?.htmlLimitedBots,
);
flushStagedHeaders();
flushRequestHeaders();
diff --git a/packages/vinext/src/server/dev-server.ts b/packages/vinext/src/server/dev-server.ts
index 2b86baeed..1fc2e29d1 100644
--- a/packages/vinext/src/server/dev-server.ts
+++ b/packages/vinext/src/server/dev-server.ts
@@ -66,6 +66,7 @@ import {
} from "./pages-document-initial-props.js";
import { callDocumentGetInitialProps } from "./document-initial-head.js";
import { loadPagesGetInitialProps } from "./pages-get-initial-props.js";
+import { isBotUserAgent } from "../utils/html-limited-bots.js";
/**
* Render a React element to a string using renderToReadableStream.
@@ -409,6 +410,7 @@ export function createSSRHandler(
* `next.config`. When undefined or empty, no meta tags are emitted.
*/
clientTraceMetadata?: readonly string[],
+ htmlLimitedBots?: string,
) {
const matcher = fileMatcher ?? createValidFileMatcher();
@@ -740,7 +742,10 @@ export function createSSRHandler(
// Render the loading shell for `fallback: true` when the path
// wasn't pre-rendered. Data requests still resolve real props so
// the client can swap in after the shell ships.
- if (fallback === true && !isValidPath && !isDataReq) {
+ const userAgentHeader = req.headers["user-agent"];
+ const userAgent = Array.isArray(userAgentHeader) ? userAgentHeader[0] : userAgentHeader;
+ const isBotRequest = !!userAgent && isBotUserAgent(userAgent, htmlLimitedBots);
+ if (fallback === true && !isValidPath && !isDataReq && !isBotRequest) {
isFallbackRender = true;
if (typeof routerShim.setSSRContext === "function") {
routerShim.setSSRContext({
diff --git a/packages/vinext/src/server/pages-page-data.ts b/packages/vinext/src/server/pages-page-data.ts
index 65700724c..689c6349d 100644
--- a/packages/vinext/src/server/pages-page-data.ts
+++ b/packages/vinext/src/server/pages-page-data.ts
@@ -25,6 +25,7 @@ import {
import { buildNextDataJsonResponse } from "./pages-data-route.js";
import { NEXTJS_DEPLOYMENT_ID_HEADER } from "./headers.js";
import { isSerializableProps } from "./pages-serializable-props.js";
+import { isBotUserAgent } from "../utils/html-limited-bots.js";
type PagesRedirectResult = {
destination: string;
@@ -183,6 +184,7 @@ export type ResolvePagesPageDataOptions = {
* Typically sourced from `process.env.__VINEXT_DEPLOYMENT_ID || process.env.NEXT_DEPLOYMENT_ID`.
*/
deploymentId?: string;
+ htmlLimitedBots?: string;
pageModule: PagesPageModule;
params: Record;
query: Record;
@@ -544,7 +546,9 @@ export async function resolvePagesPageData(
// Render the fallback shell for unlisted paths under `fallback: true`.
// Data requests resolve props normally so the client can fill in after
// the loading shell ships (`fallback: 'blocking'` keeps SSRing as before).
- if (fallback === true && !isValidPath && !options.isDataReq) {
+ const isBotRequest =
+ !!options.userAgent && isBotUserAgent(options.userAgent, options.htmlLimitedBots);
+ if (fallback === true && !isValidPath && !options.isDataReq && !isBotRequest) {
isFallback = true;
}
}
diff --git a/packages/vinext/src/server/pages-page-handler.ts b/packages/vinext/src/server/pages-page-handler.ts
index 2d51ef3cb..8103d76a4 100644
--- a/packages/vinext/src/server/pages-page-handler.ts
+++ b/packages/vinext/src/server/pages-page-handler.ts
@@ -83,6 +83,7 @@ type VinextConfigSubset = {
assetPrefix: string;
trailingSlash: boolean;
expireTime?: number;
+ htmlLimitedBots?: string;
clientTraceMetadata?: readonly string[];
disableOptimizedLoading: boolean;
};
@@ -511,6 +512,7 @@ export function createPagesPageHandler(
applyRequestContexts: applySSRContext,
buildId,
deploymentId: process.env.__VINEXT_DEPLOYMENT_ID || process.env.NEXT_DEPLOYMENT_ID,
+ htmlLimitedBots: vinextConfig.htmlLimitedBots,
createGsspReqRes() {
return createPagesReqRes({ body: undefined, query, request, url: routeUrl });
},
diff --git a/packages/vinext/src/utils/html-limited-bots.ts b/packages/vinext/src/utils/html-limited-bots.ts
index 3d6adcff0..90f57c838 100644
--- a/packages/vinext/src/utils/html-limited-bots.ts
+++ b/packages/vinext/src/utils/html-limited-bots.ts
@@ -2,6 +2,13 @@
// packages/next/src/shared/lib/router/utils/html-bots.ts
const HTML_LIMITED_BOT_UA_RE_STRING = String.raw`[\w-]+-Google|Google-[\w-]+|Chrome-Lighthouse|Slurp|DuckDuckBot|baiduspider|yandex|sogou|bitlybot|tumblr|vkShare|quora link preview|redditbot|ia_archiver|Bingbot|BingPreview|applebot|facebookexternalhit|facebookcatalog|Twitterbot|LinkedInBot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview|Yeti|googleweblight`;
+// Headless browser bot (executes JS). Mirrors Next.js
+// `HEADLESS_BROWSER_BOT_UA_RE` in
+// `.nextjs-ref/packages/next/src/shared/lib/router/utils/is-bot.ts`.
+// Matches "Googlebot" but NOT "Mediapartners-Google" / "AdsBot-Google" /
+// other Google crawlers, which are covered by the HTML-limited list.
+const HEADLESS_BROWSER_BOT_UA_RE = /Googlebot(?!-)|Googlebot$/i;
+
const htmlLimitedBotRegexCache = new Map();
export function getHtmlLimitedBotRegex(htmlLimitedBots: string | undefined): RegExp {
@@ -13,3 +20,25 @@ export function getHtmlLimitedBotRegex(htmlLimitedBots: string | undefined): Reg
htmlLimitedBotRegexCache.set(source, regex);
return regex;
}
+
+/**
+ * Returns true when the User-Agent matches a known crawler/bot. Combines
+ * Next.js's "headless browser bot" check (Googlebot proper) with the
+ * "HTML-limited bot" list (Bingbot, DuckDuckBot, facebookexternalhit, …).
+ *
+ * Used by the Pages Router fallback path: a bot hitting an unlisted
+ * `fallback: true` route should get a synchronous render (real content) and
+ * not the loading shell, so the crawler indexes the actual page. Mirrors
+ * Next.js's `isBot()` in `.nextjs-ref/packages/next/src/shared/lib/router/utils/is-bot.ts`
+ * and the bot-aware fallback flip in
+ * `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`.
+ *
+ * `htmlLimitedBots` allows next.config to override the HTML-limited list
+ * (same flag that drives `getHtmlLimitedBotRegex`), so a custom list applies
+ * to both streaming metadata gating and bot-aware fallback rendering.
+ */
+export function isBotUserAgent(userAgent: string, htmlLimitedBots?: string): boolean {
+ if (!userAgent) return false;
+ if (HEADLESS_BROWSER_BOT_UA_RE.test(userAgent)) return true;
+ return getHtmlLimitedBotRegex(htmlLimitedBots).test(userAgent);
+}
diff --git a/tests/pages-page-data.test.ts b/tests/pages-page-data.test.ts
index e0b112904..3dde4a39f 100644
--- a/tests/pages-page-data.test.ts
+++ b/tests/pages-page-data.test.ts
@@ -282,6 +282,70 @@ describe("pages page data", () => {
await expect(result.response.text()).resolves.toBe("{}");
});
+ // Refs #1543: a crawler/bot UA hitting an unlisted `fallback: true` path
+ // must NOT receive the loading shell — it should render synchronously so
+ // the bot indexes real content. Mirrors Next.js's bot check in
+ // `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`.
+ it("does not set isFallback for bot User-Agent on unlisted fallback: true paths", async () => {
+ let gspCalled = false;
+ const result = await resolvePagesPageData(
+ createOptions({
+ pageModule: {
+ async getStaticPaths() {
+ return {
+ fallback: true,
+ paths: [{ params: { slug: "hello-world" } }],
+ };
+ },
+ async getStaticProps({ params }) {
+ gspCalled = true;
+ return { props: { slug: params?.slug ?? null } };
+ },
+ },
+ params: { slug: "unknown" },
+ query: { slug: "unknown" },
+ route: { isDynamic: true },
+ routeUrl: "/posts/unknown",
+ userAgent: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ }),
+ );
+
+ expect(result.kind).toBe("render");
+ if (result.kind !== "render") throw new Error("expected render result");
+ expect(result.isFallback).toBe(false);
+ expect(gspCalled).toBe(true);
+ expect(result.pageProps).toMatchObject({ slug: "unknown" });
+ });
+
+ it("sets isFallback for normal browser User-Agent on unlisted fallback: true paths", async () => {
+ const result = await resolvePagesPageData(
+ createOptions({
+ pageModule: {
+ async getStaticPaths() {
+ return {
+ fallback: true,
+ paths: [{ params: { slug: "hello-world" } }],
+ };
+ },
+ async getStaticProps() {
+ throw new Error("getStaticProps should not run on a fallback shell render");
+ },
+ },
+ params: { slug: "unknown" },
+ query: { slug: "unknown" },
+ route: { isDynamic: true },
+ routeUrl: "/posts/unknown",
+ userAgent:
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
+ }),
+ );
+
+ expect(result.kind).toBe("render");
+ if (result.kind !== "render") throw new Error("expected render result");
+ expect(result.isFallback).toBe(true);
+ expect(result.pageProps).toEqual({});
+ });
+
it("short-circuits getServerSideProps responses after res.end()", async () => {
const responsePromise = Promise.resolve(
new Response('{"ok":true}', {
diff --git a/tests/pages-router.test.ts b/tests/pages-router.test.ts
index f7fe6450f..190ac706b 100644
--- a/tests/pages-router.test.ts
+++ b/tests/pages-router.test.ts
@@ -1601,6 +1601,56 @@ describe("Pages Router integration", () => {
expect(json.pageProps).toMatchObject({ pid: "unknown" });
});
+ // Refs #1543: bot/crawler requests must bypass the `fallback: true` loading
+ // shell and synchronously render real content so crawlers index the page,
+ // not `Loading...`. Mirrors Next.js's bot check in
+ // `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`
+ // and the Next.js e2e regression test
+ // `.nextjs-ref/test/e2e/prerender-crawler.test.ts`.
+ it("renders synchronously (not the fallback shell) for crawler UAs on unlisted fallback: true paths", async () => {
+ const userAgents = [
+ "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)",
+ "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
+ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ "facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)",
+ ];
+ for (const userAgent of userAgents) {
+ const slug = `bot-slug-${Math.random().toString(36).slice(2)}`;
+ const res = await fetch(`${baseUrl}/products/${slug}`, {
+ headers: { "user-agent": userAgent },
+ });
+ expect(res.status, `UA: ${userAgent}`).toBe(200);
+ const html = await res.text();
+ // Bot should see the real rendered page, not the loading shell.
+ expect(html, `UA: ${userAgent}`).not.toContain("Loading product...");
+ expect(html, `UA: ${userAgent}`).toMatch(new RegExp(`Product ID:.*${slug}`));
+ const match = html.match(/__NEXT_DATA__\s*=\s*(\{.*?\})\s*[;<]/);
+ expect(match, `UA: ${userAgent}`).toBeTruthy();
+ const nextData = JSON.parse(match![1]);
+ expect(nextData.isFallback, `UA: ${userAgent}`).toBe(false);
+ expect(nextData.props.pageProps).toMatchObject({ pid: slug });
+ }
+ });
+
+ it("still ships the fallback shell for normal browser UAs on unlisted fallback: true paths", async () => {
+ // Counterpart of the crawler test — the bot-flip must not catch real
+ // browsers. Plain Chrome UA should still receive the loading shell.
+ const res = await fetch(`${baseUrl}/products/non-bot-slug`, {
+ headers: {
+ "user-agent":
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
+ },
+ });
+ expect(res.status).toBe(200);
+ const html = await res.text();
+ expect(html).toContain("Loading product...");
+ const match = html.match(/__NEXT_DATA__\s*=\s*(\{.*?\})\s*[;<]/);
+ expect(match).toBeTruthy();
+ const nextData = JSON.parse(match![1]);
+ expect(nextData.isFallback).toBe(true);
+ });
+
it("includes isFallback: false in __NEXT_DATA__", async () => {
const res = await fetch(`${baseUrl}/products/widget`);
const html = await res.text();