Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions packages/git/src/queries.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { createGitClient } from "./client";
import {
detectDefaultBranch,
getBranchDiffPatchesByPath,
getChangedFilesDetailed,
splitUnifiedDiffByFile,
} from "./queries";

Expand Down Expand Up @@ -242,3 +243,72 @@ describe("getBranchDiffPatchesByPath", () => {
}
});
});

// Picked to land well past the default 64KB highWaterMark of
// `createReadStream` so the regression test actually exercises the
// across-chunk path of the streaming line counter.
const LINE_COUNT_LARGER_THAN_READ_STREAM_CHUNK = 800_000;

describe("getChangedFilesDetailed > untracked line counts", () => {
let repoDir: string;

afterEach(async () => {
if (repoDir) {
await rm(repoDir, { recursive: true, force: true });
repoDir = "";
}
});

it.each([
{ name: "trailing newline", content: "a\nb\nc\n", expected: 3 },
{ name: "no trailing newline", content: "a\nb\nc", expected: 3 },
{ name: "single byte, no newline", content: "a", expected: 1 },
{ name: "lone newline", content: "\n", expected: 1 },
{ name: "consecutive newlines", content: "\n\n", expected: 2 },
// CRLF: legacy `split("\n")` counted only `\n` separators, so
// `"a\r\nb\r\n"` -> 2 lines. Byte-counter matches.
{ name: "CRLF endings", content: "a\r\nb\r\n", expected: 2 },
])("counts $name as $expected line(s)", async ({ content, expected }) => {
repoDir = await setupRepo();
await writeFile(path.join(repoDir, "f.txt"), content);

const files = await getChangedFilesDetailed(repoDir);
const f = files.find((file) => file.path === "f.txt");

expect(f).toMatchObject({ status: "untracked", linesAdded: expected });
});

it("reports 0 lines for empty untracked files", async () => {
repoDir = await setupRepo();
await writeFile(path.join(repoDir, "empty.txt"), "");

const files = await getChangedFilesDetailed(repoDir);
const empty = files.find((f) => f.path === "empty.txt");

expect(empty).toMatchObject({ status: "untracked", linesAdded: 0 });
});

// Regression guard for the OOM in #2218. Before the fix `countFileLines`
// read each untracked file's full content into memory via
// `fs.readFile(..., "utf-8")`, 16-way concurrent against every untracked
// path returned by `streamGitStatus` (up to 50k). On a monorepo with
// multi-MB build artifacts this exhausted the main-process V8 heap
// (`16 * file_bytes * 2` for V8's UTF-16) and froze the renderer waiting
// on the dead tRPC call. The fix stream-counts via `createReadStream`,
// so peak per-stream memory is ~64KB regardless of file size — the
// multi-MB case below would have OOM'd pre-fix and must still report an
// accurate line count.
it("stream-counts untracked files larger than the streaming chunk size", async () => {
repoDir = await setupRepo();
const content = "a\n".repeat(LINE_COUNT_LARGER_THAN_READ_STREAM_CHUNK);
await writeFile(path.join(repoDir, "huge.txt"), content);

const files = await getChangedFilesDetailed(repoDir);
const huge = files.find((f) => f.path === "huge.txt");

expect(huge).toMatchObject({
status: "untracked",
linesAdded: LINE_COUNT_LARGER_THAN_READ_STREAM_CHUNK,
});
});
});
61 changes: 56 additions & 5 deletions packages/git/src/queries.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { createReadStream } from "node:fs";
import * as fs from "node:fs/promises";
import * as path from "node:path";
import type { CreateGitClientOptions } from "./client";
Expand Down Expand Up @@ -413,11 +414,55 @@ function matchesExcludePattern(filePath: string, patterns: string[]): boolean {
});
}

async function countFileLines(filePath: string): Promise<number> {
async function countFileLines(
filePath: string,
options?: { signal?: AbortSignal },
): Promise<number> {
try {
const content = await fs.readFile(filePath, "utf-8");
if (!content) return 0;
return content.split("\n").length - (content.endsWith("\n") ? 1 : 0);
// `lstat` instead of `stat` so an untracked symlink (rare, but legal)
// pointing at /dev/zero or a path outside the workdir doesn't stream
// forever — symlinks fail `isFile()` and short-circuit to 0.
const stat = await fs.lstat(filePath);
if (!stat.isFile() || stat.size === 0) return 0;
return await new Promise<number>((resolve) => {
let newlines = 0;
let lastByte = -1;
const stream = createReadStream(filePath, { signal: options?.signal });
stream.on("data", (rawChunk) => {
// Node types stream chunks as `string | Buffer`; without an
// `encoding` option `createReadStream` always emits `Buffer`,
// so the cast is for the type checker, not the runtime.
const chunk = rawChunk as Buffer;
// Native `Buffer.indexOf` — ~10x faster than a per-byte JS loop
// on multi-MB buffers, which is the workload this whole function
// exists to handle.
for (
let idx = chunk.indexOf(0x0a);
idx !== -1;
idx = chunk.indexOf(0x0a, idx + 1)
) {
newlines++;
}
if (chunk.length > 0) lastByte = chunk[chunk.length - 1];
});
stream.on("end", () => {
// Guards against TOCTOU truncation between lstat and read —
// size > 0 at stat time, zero bytes by the time we open.
if (lastByte === -1) {
resolve(0);
return;
}
resolve(lastByte === 0x0a ? newlines : newlines + 1);
});
stream.on("error", (err) => {
// Don't propagate — caller already treats any failure as 0 lines.
// But log so the next time a "shows 0 lines" mystery shows up
// there's a breadcrumb (the original OOM in #2218 hid behind the
// same silent-zero return).
console.warn(`countFileLines failed for ${filePath}:`, err);
resolve(0);
});
});
} catch {
return 0;
}
Expand All @@ -427,12 +472,14 @@ async function mapWithConcurrency<T, R>(
items: readonly T[],
concurrency: number,
mapper: (item: T) => Promise<R>,
options?: { signal?: AbortSignal },
): Promise<R[]> {
if (items.length === 0) return [];
const results = new Array<R>(items.length);
let index = 0;
const worker = async () => {
while (index < items.length) {
if (options?.signal?.aborted) return;
const i = index++;
results[i] = await mapper(items[i]);
}
Expand Down Expand Up @@ -521,7 +568,11 @@ export async function getChangedFilesDetailed(
const untrackedLineCounts = await mapWithConcurrency(
untrackedToCount,
16,
(file) => countFileLines(path.join(baseDir, file)),
(file) =>
countFileLines(path.join(baseDir, file), {
signal: gitOptions?.abortSignal,
}),
{ signal: gitOptions?.abortSignal },
);
for (let i = 0; i < untrackedToCount.length; i++) {
files.push({
Expand Down
Loading