From f45c269c88816df707ce36d8b4c1bd6542914bc4 Mon Sep 17 00:00:00 2001 From: Dustin Date: Sun, 29 Mar 2026 11:56:08 -0700 Subject: [PATCH] fix: respect nested .gitignore files during indexing (#178) When walking subdirectories, load any .gitignore found in the current directory and apply its patterns against paths relative to that directory. The nearest ancestor gitignore is propagated to all descendant directories so that patterns like `.output` in `webapp/.gitignore` exclude `webapp/.output/` and everything inside it. The root .gitignore continues to be handled separately (via the existing `gitignore` parameter with repo-root-relative paths). walk_dir skips re-loading it by only checking for a nested gitignore when `rel_prefix` is non-empty (i.e. inside a subdirectory). Adds three integration tests covering: basic nested exclusion, deep descendant exclusion (gitignore two levels above), and stacking of root and nested gitignores. --- src/discover/discover.c | 77 ++++++++++++++++++++-- tests/test_discover.c | 143 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 214 insertions(+), 6 deletions(-) diff --git a/src/discover/discover.c b/src/discover/discover.c index a3aa007b..e13c2a5d 100644 --- a/src/discover/discover.c +++ b/src/discover/discover.c @@ -228,12 +228,56 @@ static void fl_add(file_list_t *fl, const char *abs_path, const char *rel_path, /* ── Recursive walk ──────────────────────────────────────────────── */ +/* + * Compute the path relative to a nested .gitignore's directory. + * + * rel_path — path relative to repo root, e.g. "webapp/src/foo.js" + * local_prefix — rel_prefix at the time the local gitignore was loaded, + * e.g. "webapp" + * + * Returns a pointer into rel_path past the local_prefix component, e.g. + * "src/foo.js", or rel_path itself when local_prefix is empty. + */ +static const char *local_rel_path(const char *rel_path, const char *local_prefix) { + if (!local_prefix || local_prefix[0] == '\0') { + return rel_path; + } + size_t prefix_len = strlen(local_prefix); + /* rel_path must start with local_prefix followed by '/' */ + if (strncmp(rel_path, local_prefix, prefix_len) == 0 && rel_path[prefix_len] == '/') { + return rel_path + prefix_len + 1; + } + return rel_path; +} + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters,misc-no-recursion) static void walk_dir(const char *dir_path, const char *rel_prefix, const cbm_discover_opts_t *opts, const cbm_gitignore_t *gitignore, const cbm_gitignore_t *cbmignore, + const cbm_gitignore_t *local_gi, const char *local_gi_prefix, file_list_t *out) { + /* Load a nested .gitignore from this directory if: + * - we are inside a subdirectory (rel_prefix is non-empty), AND + * - no ancestor directory has already provided a local gitignore. + * The root .gitignore (rel_prefix == "") is already handled by the + * caller via the separate `gitignore` parameter, so we skip it here + * to avoid redundant matching. */ + cbm_gitignore_t *owned_local_gi = NULL; + if (!local_gi && rel_prefix[0] != '\0') { + char gi_path[4096]; + snprintf(gi_path, sizeof(gi_path), "%s/.gitignore", dir_path); + struct stat gi_st; + if (stat(gi_path, &gi_st) == 0 && S_ISREG(gi_st.st_mode)) { + owned_local_gi = cbm_gitignore_load(gi_path); + if (owned_local_gi) { + local_gi = owned_local_gi; + local_gi_prefix = rel_prefix; + } + } + } + cbm_dir_t *d = cbm_opendir(dir_path); if (!d) { + cbm_gitignore_free(owned_local_gi); return; } @@ -272,16 +316,24 @@ static void walk_dir(const char *dir_path, const char *rel_prefix, const cbm_dis continue; } - /* Check gitignore */ + /* Check root gitignore (repo-relative path) */ if (gitignore && cbm_gitignore_matches(gitignore, rel_path, true)) { continue; } + /* Check nested gitignore (path relative to gitignore's directory) */ + if (local_gi) { + const char *lrel = local_rel_path(rel_path, local_gi_prefix); + if (cbm_gitignore_matches(local_gi, lrel, true)) { + continue; + } + } if (cbmignore && cbm_gitignore_matches(cbmignore, rel_path, true)) { continue; } - /* Recurse */ - walk_dir(abs_path, rel_path, opts, gitignore, cbmignore, out); + /* Recurse — pass local_gi so it applies to all descendants */ + walk_dir(abs_path, rel_path, opts, gitignore, cbmignore, local_gi, local_gi_prefix, + out); } else if (S_ISREG(st.st_mode)) { cbm_index_mode_t mode = opts ? opts->mode : CBM_MODE_FULL; @@ -300,10 +352,17 @@ static void walk_dir(const char *dir_path, const char *rel_prefix, const cbm_dis continue; } - /* Check gitignore */ + /* Check root gitignore (repo-relative path) */ if (gitignore && cbm_gitignore_matches(gitignore, rel_path, false)) { continue; } + /* Check nested gitignore (path relative to gitignore's directory) */ + if (local_gi) { + const char *lrel = local_rel_path(rel_path, local_gi_prefix); + if (cbm_gitignore_matches(local_gi, lrel, false)) { + continue; + } + } if (cbmignore && cbm_gitignore_matches(cbmignore, rel_path, false)) { continue; } @@ -338,6 +397,7 @@ static void walk_dir(const char *dir_path, const char *rel_prefix, const cbm_dis } cbm_closedir(d); + cbm_gitignore_free(owned_local_gi); } /* ── Public API ──────────────────────────────────────────────────── */ @@ -376,9 +436,14 @@ int cbm_discover(const char *repo_path, const cbm_discover_opts_t *opts, cbm_fil cbmignore = cbm_gitignore_load(gi_path); } - /* Walk */ + /* Walk — pass NULL for local_gi/local_gi_prefix initially; walk_dir will + * discover any nested .gitignores on-the-fly as it descends into + * subdirectories. Note: the root .gitignore is checked via the separate + * `gitignore` parameter (repo-relative paths); if walk_dir also loads it + * as a nested gitignore for the root directory, the patterns are applied + * twice but produce the same result (benign redundancy). */ file_list_t fl = {0}; - walk_dir(repo_path, "", opts, gitignore, cbmignore, &fl); + walk_dir(repo_path, "", opts, gitignore, cbmignore, NULL, NULL, &fl); /* Cleanup */ cbm_gitignore_free(gitignore); diff --git a/tests/test_discover.c b/tests/test_discover.c index 9770f5ae..c70d480a 100644 --- a/tests/test_discover.c +++ b/tests/test_discover.c @@ -659,6 +659,144 @@ TEST(discover_cbmignore_no_git) { PASS(); } +/* --- Issue #178: nested .gitignore files should be respected --- */ + +/* Basic nested .gitignore: webapp/.gitignore with ".output" should exclude + * webapp/.output/ and its contents from indexing. */ +TEST(discover_nested_gitignore) { + SKIP_UNIX_SHELL; + const char *base = "/tmp/test_discover_nested_gi"; + char cmd[1024]; + snprintf(cmd, sizeof(cmd), + "rm -rf %s && mkdir -p %s/.git && " + "mkdir -p %s/webapp/.output/chunks && " + "mkdir -p %s/webapp/src && " + "printf '.output\\n' > %s/webapp/.gitignore && " + "echo 'package main' > %s/main.go && " + "echo 'const x = 1' > %s/webapp/src/app.js && " + "echo 'const chunk = 1' > %s/webapp/.output/chunks/app.js", + base, base, base, base, base, base, base, base); + system(cmd); + + cbm_discover_opts_t opts = {0}; + cbm_file_info_t *files = NULL; + int count = 0; + + int rc = cbm_discover(base, &opts, &files, &count); + ASSERT_EQ(rc, 0); + + bool found_output = false; + for (int i = 0; i < count; i++) { + if (strstr(files[i].rel_path, ".output")) + found_output = true; + } + ASSERT_FALSE(found_output); /* .output excluded by webapp/.gitignore */ + + /* webapp/src/app.js and main.go should be present */ + bool found_app = false, found_main = false; + for (int i = 0; i < count; i++) { + if (strstr(files[i].rel_path, "webapp/src/app.js")) + found_app = true; + if (strstr(files[i].rel_path, "main.go")) + found_main = true; + } + ASSERT_TRUE(found_app); + ASSERT_TRUE(found_main); + + cbm_discover_free(files, count); + snprintf(cmd, sizeof(cmd), "rm -rf %s", base); + system(cmd); + PASS(); +} + +/* Nested .gitignore patterns apply to deeper descendants, not just immediate + * children: webapp/.gitignore with "generated/" should exclude + * webapp/src/generated/foo.go even though it is two levels deep. */ +TEST(discover_nested_gitignore_deep) { + SKIP_UNIX_SHELL; + const char *base = "/tmp/test_discover_nested_gi_deep"; + char cmd[1024]; + snprintf(cmd, sizeof(cmd), + "rm -rf %s && mkdir -p %s/.git && " + "mkdir -p %s/webapp/src/generated && " + "mkdir -p %s/webapp/src/api && " + "printf 'generated/\\n' > %s/webapp/.gitignore && " + "echo 'const x = 1' > %s/webapp/src/api/routes.js && " + "echo 'const g = 1' > %s/webapp/src/generated/schema.js", + base, base, base, base, base, base, base); + system(cmd); + + cbm_discover_opts_t opts = {0}; + cbm_file_info_t *files = NULL; + int count = 0; + + int rc = cbm_discover(base, &opts, &files, &count); + ASSERT_EQ(rc, 0); + + bool found_generated = false, found_routes = false; + for (int i = 0; i < count; i++) { + if (strstr(files[i].rel_path, "generated")) + found_generated = true; + if (strstr(files[i].rel_path, "routes.js")) + found_routes = true; + } + ASSERT_FALSE(found_generated); /* excluded by nested gitignore */ + ASSERT_TRUE(found_routes); /* not excluded */ + + cbm_discover_free(files, count); + snprintf(cmd, sizeof(cmd), "rm -rf %s", base); + system(cmd); + PASS(); +} + +/* Root .gitignore and nested .gitignore both apply independently. */ +TEST(discover_nested_gitignore_stacks_with_root) { + SKIP_UNIX_SHELL; + const char *base = "/tmp/test_discover_nested_gi_stack"; + char cmd[1024]; + snprintf(cmd, sizeof(cmd), + "rm -rf %s && mkdir -p %s/.git && " + "mkdir -p %s/webapp/.output && " + "mkdir -p %s/webapp/src && " + "printf '*.log\\n' > %s/.gitignore && " + "printf '.output\\n' > %s/webapp/.gitignore && " + "echo 'package main' > %s/main.go && " + "echo 'error log' > %s/error.log && " + "echo 'const x = 1' > %s/webapp/src/app.js && " + "echo 'output data' > %s/webapp/.output/data.js", + base, base, base, base, base, base, base, base, base, base); + system(cmd); + + cbm_discover_opts_t opts = {0}; + cbm_file_info_t *files = NULL; + int count = 0; + + int rc = cbm_discover(base, &opts, &files, &count); + ASSERT_EQ(rc, 0); + + bool found_log = false, found_output = false; + bool found_main = false, found_app = false; + for (int i = 0; i < count; i++) { + if (strstr(files[i].rel_path, ".log")) + found_log = true; + if (strstr(files[i].rel_path, ".output")) + found_output = true; + if (strstr(files[i].rel_path, "main.go")) + found_main = true; + if (strstr(files[i].rel_path, "app.js")) + found_app = true; + } + ASSERT_FALSE(found_log); /* excluded by root .gitignore */ + ASSERT_FALSE(found_output); /* excluded by nested .gitignore */ + ASSERT_TRUE(found_main); + ASSERT_TRUE(found_app); + + cbm_discover_free(files, count); + snprintf(cmd, sizeof(cmd), "rm -rf %s", base); + system(cmd); + PASS(); +} + /* ── Suite ─────────────────────────────────────────────────────── */ SUITE(discover) { @@ -747,4 +885,9 @@ SUITE(discover) { RUN_TEST(discover_generic_dirs_full_mode); RUN_TEST(discover_generic_dirs_fast_mode); RUN_TEST(discover_cbmignore_no_git); + + /* Nested .gitignore tests (issue #178) */ + RUN_TEST(discover_nested_gitignore); + RUN_TEST(discover_nested_gitignore_deep); + RUN_TEST(discover_nested_gitignore_stacks_with_root); }