diff --git a/packages/git/src/queries.test.ts b/packages/git/src/queries.test.ts index 6eab2e7b4..77d387ea6 100644 --- a/packages/git/src/queries.test.ts +++ b/packages/git/src/queries.test.ts @@ -6,6 +6,7 @@ import { createGitClient } from "./client"; import { detectDefaultBranch, getBranchDiffPatchesByPath, + getChangedFilesDetailed, splitUnifiedDiffByFile, } from "./queries"; @@ -242,3 +243,72 @@ describe("getBranchDiffPatchesByPath", () => { } }); }); + +// Picked to land well past the default 64KB highWaterMark of +// `createReadStream` so the regression test actually exercises the +// across-chunk path of the streaming line counter. +const LINE_COUNT_LARGER_THAN_READ_STREAM_CHUNK = 800_000; + +describe("getChangedFilesDetailed > untracked line counts", () => { + let repoDir: string; + + afterEach(async () => { + if (repoDir) { + await rm(repoDir, { recursive: true, force: true }); + repoDir = ""; + } + }); + + it.each([ + { name: "trailing newline", content: "a\nb\nc\n", expected: 3 }, + { name: "no trailing newline", content: "a\nb\nc", expected: 3 }, + { name: "single byte, no newline", content: "a", expected: 1 }, + { name: "lone newline", content: "\n", expected: 1 }, + { name: "consecutive newlines", content: "\n\n", expected: 2 }, + // CRLF: legacy `split("\n")` counted only `\n` separators, so + // `"a\r\nb\r\n"` -> 2 lines. Byte-counter matches. + { name: "CRLF endings", content: "a\r\nb\r\n", expected: 2 }, + ])("counts $name as $expected line(s)", async ({ content, expected }) => { + repoDir = await setupRepo(); + await writeFile(path.join(repoDir, "f.txt"), content); + + const files = await getChangedFilesDetailed(repoDir); + const f = files.find((file) => file.path === "f.txt"); + + expect(f).toMatchObject({ status: "untracked", linesAdded: expected }); + }); + + it("reports 0 lines for empty untracked files", async () => { + repoDir = await setupRepo(); + await writeFile(path.join(repoDir, "empty.txt"), ""); + + const files = await getChangedFilesDetailed(repoDir); + const empty = files.find((f) => f.path === "empty.txt"); + + expect(empty).toMatchObject({ status: "untracked", linesAdded: 0 }); + }); + + // Regression guard for the OOM in #2218. Before the fix `countFileLines` + // read each untracked file's full content into memory via + // `fs.readFile(..., "utf-8")`, 16-way concurrent against every untracked + // path returned by `streamGitStatus` (up to 50k). On a monorepo with + // multi-MB build artifacts this exhausted the main-process V8 heap + // (`16 * file_bytes * 2` for V8's UTF-16) and froze the renderer waiting + // on the dead tRPC call. The fix stream-counts via `createReadStream`, + // so peak per-stream memory is ~64KB regardless of file size — the + // multi-MB case below would have OOM'd pre-fix and must still report an + // accurate line count. + it("stream-counts untracked files larger than the streaming chunk size", async () => { + repoDir = await setupRepo(); + const content = "a\n".repeat(LINE_COUNT_LARGER_THAN_READ_STREAM_CHUNK); + await writeFile(path.join(repoDir, "huge.txt"), content); + + const files = await getChangedFilesDetailed(repoDir); + const huge = files.find((f) => f.path === "huge.txt"); + + expect(huge).toMatchObject({ + status: "untracked", + linesAdded: LINE_COUNT_LARGER_THAN_READ_STREAM_CHUNK, + }); + }); +}); diff --git a/packages/git/src/queries.ts b/packages/git/src/queries.ts index dc4cdcf82..56aa909cd 100644 --- a/packages/git/src/queries.ts +++ b/packages/git/src/queries.ts @@ -1,3 +1,4 @@ +import { createReadStream } from "node:fs"; import * as fs from "node:fs/promises"; import * as path from "node:path"; import type { CreateGitClientOptions } from "./client"; @@ -413,11 +414,55 @@ function matchesExcludePattern(filePath: string, patterns: string[]): boolean { }); } -async function countFileLines(filePath: string): Promise { +async function countFileLines( + filePath: string, + options?: { signal?: AbortSignal }, +): Promise { try { - const content = await fs.readFile(filePath, "utf-8"); - if (!content) return 0; - return content.split("\n").length - (content.endsWith("\n") ? 1 : 0); + // `lstat` instead of `stat` so an untracked symlink (rare, but legal) + // pointing at /dev/zero or a path outside the workdir doesn't stream + // forever — symlinks fail `isFile()` and short-circuit to 0. + const stat = await fs.lstat(filePath); + if (!stat.isFile() || stat.size === 0) return 0; + return await new Promise((resolve) => { + let newlines = 0; + let lastByte = -1; + const stream = createReadStream(filePath, { signal: options?.signal }); + stream.on("data", (rawChunk) => { + // Node types stream chunks as `string | Buffer`; without an + // `encoding` option `createReadStream` always emits `Buffer`, + // so the cast is for the type checker, not the runtime. + const chunk = rawChunk as Buffer; + // Native `Buffer.indexOf` — ~10x faster than a per-byte JS loop + // on multi-MB buffers, which is the workload this whole function + // exists to handle. + for ( + let idx = chunk.indexOf(0x0a); + idx !== -1; + idx = chunk.indexOf(0x0a, idx + 1) + ) { + newlines++; + } + if (chunk.length > 0) lastByte = chunk[chunk.length - 1]; + }); + stream.on("end", () => { + // Guards against TOCTOU truncation between lstat and read — + // size > 0 at stat time, zero bytes by the time we open. + if (lastByte === -1) { + resolve(0); + return; + } + resolve(lastByte === 0x0a ? newlines : newlines + 1); + }); + stream.on("error", (err) => { + // Don't propagate — caller already treats any failure as 0 lines. + // But log so the next time a "shows 0 lines" mystery shows up + // there's a breadcrumb (the original OOM in #2218 hid behind the + // same silent-zero return). + console.warn(`countFileLines failed for ${filePath}:`, err); + resolve(0); + }); + }); } catch { return 0; } @@ -427,12 +472,14 @@ async function mapWithConcurrency( items: readonly T[], concurrency: number, mapper: (item: T) => Promise, + options?: { signal?: AbortSignal }, ): Promise { if (items.length === 0) return []; const results = new Array(items.length); let index = 0; const worker = async () => { while (index < items.length) { + if (options?.signal?.aborted) return; const i = index++; results[i] = await mapper(items[i]); } @@ -521,7 +568,11 @@ export async function getChangedFilesDetailed( const untrackedLineCounts = await mapWithConcurrency( untrackedToCount, 16, - (file) => countFileLines(path.join(baseDir, file)), + (file) => + countFileLines(path.join(baseDir, file), { + signal: gitOptions?.abortSignal, + }), + { signal: gitOptions?.abortSignal }, ); for (let i = 0; i < untrackedToCount.length; i++) { files.push({