From 0840110116a967d39d8b63dbfe822ea9b2500088 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 5 May 2026 12:19:51 -0400 Subject: [PATCH 01/13] t5620: make test work with path-walk var The GIT_TEST_PACK_PATH_WALK test variable allows enabling the --path-walk option to 'git pack-objects' by default. This sometimes engages the warning that --path-walk is incompatible with the --filter option. These tests in t5620 fail due to this warning over stderr in this case. Disable this variable for this moment until these options work together. Signed-off-by: Derrick Stolee --- t/t5620-backfill.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 94f35ce1901671..e1742907871ba3 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -298,6 +298,9 @@ test_expect_success 'backfill with prefix pathspec' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && + # If we enable --path-walk here, we will get a warning overs stderr + # due to incompatibilities with --filter. + GIT_TEST_PACK_PATH_WALK=0 \ git -C backfill-path backfill HEAD -- d/f 2>err && test_must_be_empty err && @@ -315,6 +318,9 @@ test_expect_success 'backfill with multiple pathspecs' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && + # If we enable --path-walk here, we will get a warning overs stderr + # due to incompatibilities with --filter. + GIT_TEST_PACK_PATH_WALK=0 \ git -C backfill-path backfill HEAD -- d/f a 2>err && test_must_be_empty err && @@ -332,6 +338,9 @@ test_expect_success 'backfill with wildcard pathspec' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && + # If we enable --path-walk here, we will get a warning overs stderr + # due to incompatibilities with --filter. + GIT_TEST_PACK_PATH_WALK=0 \ git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err && test_must_be_empty err && From d7c87545f38a2457031bb2e75a51b464a1525ed6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 27 Apr 2026 11:36:34 -0400 Subject: [PATCH 02/13] pack-objects: pass --objects with --path-walk When 'git pack-objects' has the --path-walk option enabled, it uses a different set of revision walk parameters than normal. For one, --objects was previously assumed by the path-walk API and could be omitted. We also needed --boundary to allow discovering UNINTERESTING objects to use as delta bases. We will be updating the path-walk API soon to work with some filter options. However, the revision machinery will trigger a fatal error: fatal: object filtering requires --objects The fix is easy: add the --objects option as an argument. This has no effect on the path-walk API but does simplify the revision option parsing for the objects filter. We can remove the comment about "removing" the options because they were never removed and instead not added. We still need to disable using bitmaps. Signed-off-by: Derrick Stolee --- builtin/pack-objects.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index dd2480a73d2edf..4338962904bc94 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -5190,10 +5190,7 @@ int cmd_pack_objects(int argc, } if (path_walk) { strvec_push(&rp, "--boundary"); - /* - * We must disable the bitmaps because we are removing - * the --objects / --objects-edge[-aggressive] options. - */ + strvec_push(&rp, "--objects"); use_bitmap_index = 0; } else if (thin) { use_internal_rev_list = 1; From fb8a0f9c43d4e41712839a93c4db6a294a7b5285 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 27 Apr 2026 11:36:48 -0400 Subject: [PATCH 03/13] t/perf: add pack-objects filter and path-walk benchmark Add p5315-pack-objects-filter.sh to measure the performance of 'git pack-objects --revs --all' under different filter and traversal combinations: * no filter (baseline) * --filter=blob:none (blobless) * --filter=sparse:oid= (cone-mode sparse) Each filter scenario is tested both with and without --path-walk, producing paired measurements that show the impact of the path-walk traversal for each filter type as we integrate the --path-walk feature with different --filter options. It currently has no integration so falls back to the standard revision walk. Thus, there are no significant differences in the current results other than a full repack (and even then, the --path-walk feature is not incredibly different for the default Git repository): Test HEAD ----------------------------------------------------- 5315.2: repack (no filter) 27.91 5315.3: repack size (no filter) 250.7M 5315.4: repack (no filter, --path-walk) 34.92 5315.5: repack size (no filter, --path-walk) 220.0M 5315.6: repack (blob:none) 13.63 5315.7: repack size (blob:none) 137.6M 5315.8: repack (blob:none, --path-walk) 13.48 5315.9: repack size (blob:none, --path-walk) 137.7M 5315.10: repack (sparse:oid) 72.67 5315.11: repack size (sparse:oid) 187.4M 5315.12: repack (sparse:oid, --path-walk) 72.47 5315.13: repack size (sparse:oid, --path-walk) 187.4M The sparse filter definition is built automatically by sampling depth-2 directories from the test repository, making the test work on any repo passed via GIT_PERF_LARGE_REPO. For repos that lack depth-2 directories, a single top-level directory is used; for flat repos, the sparse tests are skipped via prerequisite. Signed-off-by: Derrick Stolee --- t/perf/p5315-pack-objects-filter.sh | 131 ++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100755 t/perf/p5315-pack-objects-filter.sh diff --git a/t/perf/p5315-pack-objects-filter.sh b/t/perf/p5315-pack-objects-filter.sh new file mode 100755 index 00000000000000..21056abfc03b4b --- /dev/null +++ b/t/perf/p5315-pack-objects-filter.sh @@ -0,0 +1,131 @@ +#!/bin/sh + +test_description='Tests pack-objects performance with filters and --path-walk' +. ./perf-lib.sh + +test_perf_large_repo + +test_expect_success 'setup filter inputs' ' + # Sample a few depth-2 directories from the test repo to build + # a cone-mode sparse-checkout definition. The sampling picks + # directories at evenly-spaced positions so the choice is stable + # and scales to repos of any shape. + + git ls-tree -d HEAD >top-entries && + grep "^040000" top-entries | + awk "{print \$4;}" >top-dirs && + top_nr=$(wc -l depth2-dirs && + while read tdir + do + git ls-tree -d --name-only "HEAD:$tdir" 2>/dev/null || return 1 + done depth2-dirs.raw && + sed "s|^|$tdir/|" depth2-dirs && + + d2_nr=$(wc -l sparse-patterns && + + git hash-object -w sparse-patterns >sparse-oid && + echo "Sparse cone: $first $mid" && + cat sparse-patterns && + test_set_prereq SPARSE_OID + elif test "$top_nr" -ge 1 + then + # Fallback: use a single top-level directory. + first=$(sed -n "1p" top-dirs) && + { + echo "/*" && + echo "!/*/" && + echo "/$first/" + } >sparse-patterns && + + git hash-object -w sparse-patterns >sparse-oid && + echo "Sparse cone: $first" && + cat sparse-patterns && + test_set_prereq SPARSE_OID + fi +' + +test_perf 'repack (no filter)' ' + git pack-objects --stdout --no-reuse-delta --revs --all pk +' + +test_size 'repack size (no filter)' ' + test_file_size pk +' + +test_perf 'repack (no filter, --path-walk)' ' + git pack-objects --stdout --no-reuse-delta --revs --all --path-walk pk +' + +test_size 'repack size (no filter, --path-walk)' ' + test_file_size pk +' + +test_perf 'repack (blob:none)' ' + git pack-objects --stdout --no-reuse-delta --revs --all --filter=blob:none pk +' + +test_size 'repack size (blob:none)' ' + test_file_size pk +' + +test_perf 'repack (blob:none, --path-walk)' ' + git pack-objects --stdout --no-reuse-delta --revs --all --path-walk \ + --filter=blob:none pk +' + +test_size 'repack size (blob:none, --path-walk)' ' + test_file_size pk +' + +test_perf 'repack (sparse:oid)' \ + --prereq SPARSE_OID ' + git pack-objects --stdout --no-reuse-delta --revs --all \ + --filter=sparse:oid=$(cat sparse-oid) pk +' + +test_size 'repack size (sparse:oid)' \ + --prereq SPARSE_OID ' + test_file_size pk +' + +test_perf 'repack (sparse:oid, --path-walk)' \ + --prereq SPARSE_OID ' + git pack-objects --stdout --no-reuse-delta --revs --all --path-walk \ + --filter=sparse:oid=$(cat sparse-oid) pk +' + +test_size 'repack size (sparse:oid, --path-walk)' \ + --prereq SPARSE_OID ' + test_file_size pk +' + +test_done From e77c8a6bbc22da3428751f81ff5ee79aa5364237 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 6 May 2026 18:50:04 -0400 Subject: [PATCH 04/13] path-walk: always emit directly-requested objects We are preparing to integrate the path-walk API with some --filter options in 'git pack-objects', but there is a subtle issue that is revealed when those are put together and the test suite is run with GIT_TEST_PACK_PATH_WALK=1. When a filter reduces the set of requested objects, this results in filtering out directly-requested objects, such as in the download of needed blobs in a blobless partial clone. The root cause is that the scan of pending objects in the path-walk API respects the filters set in the path_walk_info instead of overriding them for pending objects. We can tell that a path is part of the directly-referenced objects if its path name starts with '/' (other paths, including root trees never have this starting character). Create a path_is_for_direct_objects() to make this meaning clear, especially as we add more references in the future as we integrate the path-walk API with partial clone filter options. Signed-off-by: Derrick Stolee --- Documentation/technical/api-path-walk.adoc | 7 ++++ path-walk.c | 42 ++++++++++++++-------- path-walk.h | 5 +++ 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/Documentation/technical/api-path-walk.adoc b/Documentation/technical/api-path-walk.adoc index a67de1b143ab5b..6e17b13d61b969 100644 --- a/Documentation/technical/api-path-walk.adoc +++ b/Documentation/technical/api-path-walk.adoc @@ -48,6 +48,13 @@ commits. applications could disable some options to make it simpler to walk the objects or to have fewer calls to `path_fn`. + +Note that objects directly requested as pending objects (such as targets +of lightweight tags or other ref tips) are always emitted to `path_fn`, +even when the corresponding type flag is disabled. Only objects +discovered during the tree walk are subject to these type filters. This +ensures that objects specifically requested through the revision input +are never silently dropped. ++ While it is possible to walk only commits in this way, consumers would be better off using the revision walk API instead. diff --git a/path-walk.c b/path-walk.c index 6e426af4330893..05bfc1c1142ae2 100644 --- a/path-walk.c +++ b/path-walk.c @@ -248,6 +248,17 @@ static int add_tree_entries(struct path_walk_context *ctx, return 0; } +/* + * Paths starting with '/' (e.g., "/tags", "/tagged-blobs") hold objects that + * were directly requested by 'pending' objects rather than discovered during + * tree traversal. + */ +static int path_is_for_direct_objects(const char *path) +{ + ASSERT(path); + return path[0] == '/'; +} + /* * For each path in paths_to_explore, walk the trees another level * and add any found blobs to the batch (but only if they exist and @@ -306,14 +317,19 @@ static int walk_path(struct path_walk_context *ctx, if (list->type == OBJ_BLOB && ctx->revs->prune_data.nr && + !path_is_for_direct_objects(path) && !match_pathspec(ctx->repo->index, &ctx->revs->prune_data, path, strlen(path), 0, NULL, 0)) return 0; - /* Evaluate function pointer on this data, if requested. */ - if ((list->type == OBJ_TREE && ctx->info->trees) || - (list->type == OBJ_BLOB && ctx->info->blobs) || + /* + * Evaluate function pointer on this data, if requested. + * Ignore object type filters for tagged objects (path starts + * with `/`). + */ + if ((list->type == OBJ_TREE && (ctx->info->trees || path_is_for_direct_objects(path))) || + (list->type == OBJ_BLOB && (ctx->info->blobs || path_is_for_direct_objects(path))) || (list->type == OBJ_TAG && ctx->info->tags)) ret = ctx->info->path_fn(path, &list->oids, list->type, ctx->info->path_fn_data); @@ -374,10 +390,8 @@ static int setup_pending_objects(struct path_walk_info *info, if (info->tags) CALLOC_ARRAY(tags, 1); - if (info->blobs) - CALLOC_ARRAY(tagged_blobs, 1); - if (info->trees) - root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); + CALLOC_ARRAY(tagged_blobs, 1); + root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); /* * Pending objects include: @@ -421,8 +435,6 @@ static int setup_pending_objects(struct path_walk_info *info, switch (obj->type) { case OBJ_TREE: - if (!info->trees) - continue; if (pending->path) { char *path = *pending->path ? xstrfmt("%s/", pending->path) : xstrdup(""); @@ -435,8 +447,6 @@ static int setup_pending_objects(struct path_walk_info *info, break; case OBJ_BLOB: - if (!info->blobs) - continue; if (pending->path) add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1); else @@ -532,15 +542,17 @@ int walk_objects_by_path(struct path_walk_info *info) push_to_stack(&ctx, root_path); /* - * Set these values before preparing the walk to catch - * lightweight tags pointing to non-commits and indexed objects. + * Ensure that prepare_revision_walk() keeps all pending objects + * even through an object type filter. */ - info->revs->blob_objects = info->blobs; - info->revs->tree_objects = info->trees; + info->revs->blob_objects = info->revs->tree_objects = 1; if (prepare_revision_walk(info->revs)) die(_("failed to setup revision walk")); + info->revs->blob_objects = info->blobs; + info->revs->tree_objects = info->trees; + /* * Walk trees to mark them as UNINTERESTING. * This is particularly important when 'edge_aggressive' is set. diff --git a/path-walk.h b/path-walk.h index 5ef5a8440e6b5e..657eeda8ec00e7 100644 --- a/path-walk.h +++ b/path-walk.h @@ -36,6 +36,11 @@ struct path_walk_info { /** * Initialize which object types the path_fn should be called on. This * could also limit the walk to skip blobs if not set. + * + * Note: even when 'blobs' or 'trees' is disabled, objects that are + * directly requested as pending objects will still be emitted to + * path_fn. Only objects discovered during the tree walk are filtered by + * these flags. */ int commits; int trees; From f4904f81e0caae12ac1cb8bb30ed58fdb8889c50 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 27 Apr 2026 09:02:45 -0400 Subject: [PATCH 05/13] path-walk: support blobless filter The 'git pack-objects' command can opt-in to using the path-walk API for scanning the objects. Currently, this option is dynamically disabled if combined with '--filter=', even when using a simple filter such as 'blob:none' to signal a blobless packfile. This is a common scenario for repos at scale, so is worth integrating. Also, users can opt-in to the '--path-walk' option by default through the pack.usePathWalk=true config option. When using that in a blobless partial clone, the following warning can appear even though the user did not specify either option directly: warning: cannot use --filter with --path-walk Teach the path-walk API to handle the 'blob:none' object filter natively. When revs->filter.choice is LOFC_BLOB_NONE, the path-walk sets info->blobs to 0 (skipping all blob objects) and clears the filter from revs so that prepare_revision_walk() does not reject the configuration. This check is implemented in the static prepare_filters() method, which will simultaneously check if the input filters are compatible and will make the appropriate mutations to the path_walk_info and filters if the path_walk_info is non-NULL. This allows us to use this logic both in the API method path_walk_filter_compatible() for use in builtin/pack-objects.c and as a prep step in walk_objects_by_path(). Update the test helper (test-path-walk) to accept --filter= as a test-tool option (before '--'), applying it to revs after setup_revisions() to avoid the --objects requirement check. We can also revert recent GIT_TEST_PACK_PATH_WALK overrides in t5620. Also switch test-path-walk from REV_INFO_INIT with manual repo assignment to repo_init_revisions(), which properly initializes the filter_spec strbuf needed for filter parsing. Add tests for blob:none with --all and with a single branch. The performance test p5315 shows the impact of this change when using blobless filters: Test HEAD~1 HEAD --------------------------------------------------------------------- 5315.6: repack (blob:none) 13.53 13.87 +2.5% 5315.7: repack size (blob:none) 137.7M 137.8M +0.1% 5315.8: repack (blob:none, --path-walk) 13.51 23.43 +73.4% 5315.9: repack size (blob:none, --path-walk) 137.7M 115.2M -16.3% These performance tests were run on the Git repository. The --path-walk feature shows meaningful space savings (16% smaller for blobless packs) at the cost of increased computation time due to the two compression passes. This data demonstrates that the feature is engaged and provides real compression benefits when --no-reuse-delta forces fresh deltas. Co-Authored-by: Taylor Blau Signed-off-by: Taylor Blau Signed-off-by: Derrick Stolee --- Documentation/git-pack-objects.adoc | 6 +-- builtin/pack-objects.c | 2 +- path-walk.c | 30 ++++++++++++++ path-walk.h | 7 ++++ t/helper/test-path-walk.c | 11 ++++- t/t5620-backfill.sh | 9 ----- t/t6601-path-walk.sh | 62 +++++++++++++++++++++++++++++ 7 files changed, 113 insertions(+), 14 deletions(-) diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index b78175fbe1b97b..2994faf988505f 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -402,9 +402,9 @@ will be automatically changed to version `1`. of filenames that cause collisions in Git's default name-hash algorithm. + -Incompatible with `--delta-islands`, `--shallow`, or `--filter`. The -`--use-bitmap-index` option will be ignored in the presence of -`--path-walk.` +Incompatible with `--delta-islands`. The `--use-bitmap-index` option is +ignored in the presence of `--path-walk`. The `--path-walk` option +supports the `--filter=` form `blob:none`. DELTA ISLANDS diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 4338962904bc94..bc9fb5b45737a3 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -5177,7 +5177,7 @@ int cmd_pack_objects(int argc, if (path_walk) { const char *option = NULL; - if (filter_options.choice) + if (!path_walk_filter_compatible(&filter_options)) option = "--filter"; else if (use_delta_islands) option = "--delta-islands"; diff --git a/path-walk.c b/path-walk.c index 05bfc1c1142ae2..bd81508163f97c 100644 --- a/path-walk.c +++ b/path-walk.c @@ -9,6 +9,7 @@ #include "hashmap.h" #include "hex.h" #include "list-objects.h" +#include "list-objects-filter-options.h" #include "object.h" #include "oid-array.h" #include "path.h" @@ -495,6 +496,32 @@ static int setup_pending_objects(struct path_walk_info *info, return 0; } +static int prepare_filters(struct path_walk_info *info, + struct list_objects_filter_options *options) +{ + switch (options->choice) { + case LOFC_DISABLED: + return 1; + + case LOFC_BLOB_NONE: + if (info) { + info->blobs = 0; + list_objects_filter_release(options); + } + return 1; + + default: + error(_("object filter '%s' not supported by the path-walk API"), + list_objects_filter_spec(options)); + return 0; + } +} + +int path_walk_filter_compatible(struct list_objects_filter_options *options) +{ + return prepare_filters(NULL, options); +} + /** * Given the configuration of 'info', walk the commits based on 'info->revs' and * call 'info->path_fn' on each discovered path. @@ -522,6 +549,9 @@ int walk_objects_by_path(struct path_walk_info *info) trace2_region_enter("path-walk", "commit-walk", info->revs->repo); + if (!prepare_filters(info, &info->revs->filter)) + return -1; + CALLOC_ARRAY(commit_list, 1); commit_list->type = OBJ_COMMIT; diff --git a/path-walk.h b/path-walk.h index 657eeda8ec00e7..a1736ecb2b923e 100644 --- a/path-walk.h +++ b/path-walk.h @@ -90,3 +90,10 @@ void path_walk_info_clear(struct path_walk_info *info); * Returns nonzero on an error. */ int walk_objects_by_path(struct path_walk_info *info); + +struct list_objects_filter_options; +/** + * Given a set of options for filtering objects, return 1 if the options + * are compatible with the path-walk API and 0 otherwise. + */ +int path_walk_filter_compatible(struct list_objects_filter_options *options); diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index fe63002c2be27d..88f86ae0dc1157 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -4,6 +4,7 @@ #include "dir.h" #include "environment.h" #include "hex.h" +#include "list-objects-filter-options.h" #include "object-name.h" #include "object.h" #include "pretty.h" @@ -71,6 +72,8 @@ int cmd__path_walk(int argc, const char **argv) struct rev_info revs = REV_INFO_INIT; struct path_walk_info info = PATH_WALK_INFO_INIT; struct path_walk_test_data data = { 0 }; + struct list_objects_filter_options filter_options = + LIST_OBJECTS_FILTER_INIT; struct option options[] = { OPT_BOOL(0, "blobs", &info.blobs, N_("toggle inclusion of blob objects")), @@ -86,11 +89,12 @@ int cmd__path_walk(int argc, const char **argv) N_("toggle aggressive edge walk")), OPT_BOOL(0, "stdin-pl", &stdin_pl, N_("read a pattern list over stdin")), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END(), }; setup_git_directory(); - revs.repo = the_repository; + repo_init_revisions(the_repository, &revs, NULL); argc = parse_options(argc, argv, NULL, options, path_walk_usage, @@ -101,6 +105,10 @@ int cmd__path_walk(int argc, const char **argv) else usage(path_walk_usage[0]); + /* Apply the filter after setup_revisions to avoid the --objects check. */ + if (filter_options.choice) + list_objects_filter_copy(&revs.filter, &filter_options); + info.revs = &revs; info.path_fn = emit_block; info.path_fn_data = &data; @@ -129,6 +137,7 @@ int cmd__path_walk(int argc, const char **argv) free(info.pl); } + list_objects_filter_release(&filter_options); release_revisions(&revs); return res; } diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index e1742907871ba3..94f35ce1901671 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -298,9 +298,6 @@ test_expect_success 'backfill with prefix pathspec' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # If we enable --path-walk here, we will get a warning overs stderr - # due to incompatibilities with --filter. - GIT_TEST_PACK_PATH_WALK=0 \ git -C backfill-path backfill HEAD -- d/f 2>err && test_must_be_empty err && @@ -318,9 +315,6 @@ test_expect_success 'backfill with multiple pathspecs' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # If we enable --path-walk here, we will get a warning overs stderr - # due to incompatibilities with --filter. - GIT_TEST_PACK_PATH_WALK=0 \ git -C backfill-path backfill HEAD -- d/f a 2>err && test_must_be_empty err && @@ -338,9 +332,6 @@ test_expect_success 'backfill with wildcard pathspec' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # If we enable --path-walk here, we will get a warning overs stderr - # due to incompatibilities with --filter. - GIT_TEST_PACK_PATH_WALK=0 \ git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err && test_must_be_empty err && diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 56bd1e3c5bec97..b0ee31ee2dadf6 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -415,4 +415,66 @@ test_expect_success 'trees are reported exactly once' ' test_line_count = 1 out-filtered ' +test_expect_success 'all, blob:none filter' ' + test-tool path-walk --filter=blob:none -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:tree:a/:$(git rev-parse base:a) + 5:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 6:tree:left/:$(git rev-parse base:left) + 6:tree:left/:$(git rev-parse base~2:left) + 7:tree:right/:$(git rev-parse topic:right) + 7:tree:right/:$(git rev-parse base~1:right) + 7:tree:right/:$(git rev-parse base~2:right) + blobs:2 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only, blob:none filter' ' + test-tool path-walk --filter=blob:none -- topic >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:tree:left/:$(git rev-parse base~2:left) + 3:tree:right/:$(git rev-parse topic:right) + 3:tree:right/:$(git rev-parse base~1:right) + 3:tree:right/:$(git rev-parse base~2:right) + blobs:0 + commits:3 + tags:0 + trees:7 + EOF + + test_cmp_sorted expect out +' + test_done From f37467e46f3c23b90fc5c772ae24164aafab58c5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 27 Apr 2026 11:00:59 -0400 Subject: [PATCH 06/13] backfill: die on incompatible filter options The 'git backfill' command uses the path-walk API in a critical way: it uses the objects output from the command to find the batches of missing objects that should be requested from the server. Unlike 'git pack-objects', we cannot fall back to another mechanism. The previous change added the path_walk_filter_compatible() method that we can reuse here. Use it during argument validation in cmd_backfill(). Signed-off-by: Derrick Stolee --- builtin/backfill.c | 5 ++--- t/t5620-backfill.sh | 8 ++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/builtin/backfill.c b/builtin/backfill.c index 7ffab2ea74f5cc..b80f9ebe691080 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -96,9 +96,8 @@ static void reject_unsupported_rev_list_options(struct rev_info *revs) if (revs->explicit_diff_merges) die(_("'%s' cannot be used with 'git backfill'"), "--diff-merges"); - if (revs->filter.choice) - die(_("'%s' cannot be used with 'git backfill'"), - "--filter"); + if (!path_walk_filter_compatible(&revs->filter)) + die(_("cannot backfill with these filter options")); } static int do_backfill(struct backfill_context *ctx) diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 94f35ce1901671..ede89f8c333ca3 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -15,6 +15,14 @@ test_expect_success 'backfill rejects unexpected arguments' ' test_grep "unrecognized argument: --unexpected-arg" err ' +test_expect_success 'backfill rejects incompatible filter options' ' + test_must_fail git backfill --objects --filter=tree:1 2>err && + test_grep "cannot backfill with these filter options" err && + + test_must_fail git backfill --objects --filter=blob:limit=10m 2>err && + test_grep "cannot backfill with these filter options" err +' + # We create objects in the 'src' repo. test_expect_success 'setup repo for object creation' ' echo "{print \$1}" >print_1.awk && From 133c1b156cc8b2e5d0c46b3b44066ebcddaffe64 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 27 Apr 2026 10:15:33 -0400 Subject: [PATCH 07/13] path-walk: support blob size limit filter Extend the path-walk API to handle the 'blob:limit=' object filter natively. This filter omits blobs whose size is equal to or greater than the given limit, matching the semantics used by the list-objects-filter machinery. When revs->filter.choice is LOFC_BLOB_LIMIT, the prepare_filters() method stores the limit value in info->blob_limit and clears the filter from revs. If the limit is zero, this degenerates to blob:none (all blobs excluded), so info->blobs is set to 0 instead. During walk_path(), blob batches are filtered before being delivered to the callback: each blob's size is checked via odb_read_object_info(), and only blobs strictly smaller than the limit are included. Blobs whose size cannot be determined (e.g. missing in a partial clone) are conservatively included, matching the existing filter behavior. Empty batches after filtering are skipped entirely. The check for inclusion in the path batch looks a little strange at first glance. We use odb_read_object_info() to read the object's size. Based on all of the assumptions to this point, this _should_ return OBJ_BLOB. Since we are focused on the size filter, we use a short-circuited OR (||) to skip the size check if that method returns a different object type. Notice that this inspection of object sizes requires the content to be present in the repository. The odb_read_object_info() call will download a missing blob on-demand. This means that the use of the path-walk API within 'git backfill' would not operate nicely with this filter type. The intention of that command is to download missing blobs in batches. Downloading objects one-by-one would go against the point. Update the validation in 'git backfill' to add its own compatibility check on top of path_walk_filter_compatible(). Add tests for blob:limit=0 (equivalent to blob:none) and blob:limit=3 (which exercises partial filtering within a batch where some blobs are kept and others are excluded). Co-authored-by: Taylor Blau Signed-off-by: Taylor Blau Signed-off-by: Derrick Stolee --- Documentation/git-pack-objects.adoc | 2 +- builtin/backfill.c | 2 + path-walk.c | 41 +++++++++++++-- path-walk.h | 7 +++ t/t5620-backfill.sh | 2 +- t/t6601-path-walk.sh | 82 +++++++++++++++++++++++++++++ 6 files changed, 130 insertions(+), 6 deletions(-) diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index 2994faf988505f..85ae48b699fd72 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -404,7 +404,7 @@ will be automatically changed to version `1`. + Incompatible with `--delta-islands`. The `--use-bitmap-index` option is ignored in the presence of `--path-walk`. The `--path-walk` option -supports the `--filter=` form `blob:none`. +supports the `--filter=` forms `blob:none` and `blob:limit=`. DELTA ISLANDS diff --git a/builtin/backfill.c b/builtin/backfill.c index b80f9ebe691080..5254a427111bbd 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -98,6 +98,8 @@ static void reject_unsupported_rev_list_options(struct rev_info *revs) "--diff-merges"); if (!path_walk_filter_compatible(&revs->filter)) die(_("cannot backfill with these filter options")); + if (revs->filter.blob_limit_value) + die(_("cannot backfill with blob size limits")); } static int do_backfill(struct backfill_context *ctx) diff --git a/path-walk.c b/path-walk.c index bd81508163f97c..04b924d4deccad 100644 --- a/path-walk.c +++ b/path-walk.c @@ -10,6 +10,7 @@ #include "hex.h" #include "list-objects.h" #include "list-objects-filter-options.h" +#include "odb.h" #include "object.h" #include "oid-array.h" #include "path.h" @@ -327,13 +328,35 @@ static int walk_path(struct path_walk_context *ctx, /* * Evaluate function pointer on this data, if requested. * Ignore object type filters for tagged objects (path starts - * with `/`). + * with `/`), first for blobs and then other types. */ - if ((list->type == OBJ_TREE && (ctx->info->trees || path_is_for_direct_objects(path))) || - (list->type == OBJ_BLOB && (ctx->info->blobs || path_is_for_direct_objects(path))) || - (list->type == OBJ_TAG && ctx->info->tags)) + if (list->type == OBJ_BLOB && + ctx->info->blob_limit && + !path_is_for_direct_objects(path)) { + struct oid_array filtered = OID_ARRAY_INIT; + + for (size_t i = 0; i < list->oids.nr; i++) { + unsigned long size; + + if (odb_read_object_info(ctx->repo->objects, + &list->oids.oid[i], + &size) != OBJ_BLOB || + size < ctx->info->blob_limit) + oid_array_append(&filtered, + &list->oids.oid[i]); + } + + if (filtered.nr) + ret = ctx->info->path_fn(path, &filtered, list->type, + ctx->info->path_fn_data); + oid_array_clear(&filtered); + } else if (path_is_for_direct_objects(path) || + (list->type == OBJ_TREE && ctx->info->trees) || + (list->type == OBJ_BLOB && ctx->info->blobs) || + (list->type == OBJ_TAG && ctx->info->tags)) { ret = ctx->info->path_fn(path, &list->oids, list->type, ctx->info->path_fn_data); + } /* Expand data for children. */ if (list->type == OBJ_TREE) { @@ -510,6 +533,16 @@ static int prepare_filters(struct path_walk_info *info, } return 1; + case LOFC_BLOB_LIMIT: + if (info) { + if (!options->blob_limit_value) + info->blobs = 0; + else + info->blob_limit = options->blob_limit_value; + list_objects_filter_release(options); + } + return 1; + default: error(_("object filter '%s' not supported by the path-walk API"), list_objects_filter_spec(options)); diff --git a/path-walk.h b/path-walk.h index a1736ecb2b923e..60ceb6543389c3 100644 --- a/path-walk.h +++ b/path-walk.h @@ -47,6 +47,13 @@ struct path_walk_info { int blobs; int tags; + /** + * If non-zero, specifies a maximum blob size. Blobs with a + * size equal to or greater than this limit will not be + * emitted unless included in 'pending'. + */ + unsigned long blob_limit; + /** * When 'prune_all_uninteresting' is set and a path has all objects * marked as UNINTERESTING, then the path-walk will not visit those diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index ede89f8c333ca3..d2ea68e065304d 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -20,7 +20,7 @@ test_expect_success 'backfill rejects incompatible filter options' ' test_grep "cannot backfill with these filter options" err && test_must_fail git backfill --objects --filter=blob:limit=10m 2>err && - test_grep "cannot backfill with these filter options" err + test_grep "cannot backfill with blob size limits" err ' # We create objects in the 'src' repo. diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index b0ee31ee2dadf6..45f366d738efac 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -477,4 +477,86 @@ test_expect_success 'topic only, blob:none filter' ' test_cmp_sorted expect out ' +test_expect_success 'all, blob:limit=0 filter' ' + test-tool path-walk --filter=blob:limit=0 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:tree:a/:$(git rev-parse base:a) + 5:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 6:tree:left/:$(git rev-parse base:left) + 6:tree:left/:$(git rev-parse base~2:left) + 7:tree:right/:$(git rev-parse topic:right) + 7:tree:right/:$(git rev-parse base~1:right) + 7:tree:right/:$(git rev-parse base~2:right) + blobs:2 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, blob:limit=3 filter' ' + test-tool path-walk --filter=blob:limit=3 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:blob:a:$(git rev-parse base~2:a) + 5:tree:a/:$(git rev-parse base:a) + 6:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 7:tree:left/:$(git rev-parse base:left) + 7:tree:left/:$(git rev-parse base~2:left) + 8:blob:left/b:$(git rev-parse base~2:left/b) + 9:tree:right/:$(git rev-parse topic:right) + 9:tree:right/:$(git rev-parse base~1:right) + 9:tree:right/:$(git rev-parse base~2:right) + 10:blob:right/c:$(git rev-parse base~2:right/c) + 11:blob:right/d:$(git rev-parse base~1:right/d) + blobs:6 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + test_done From 0f517be8e3853e05d313258133db4eab229d3835 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 30 Apr 2026 12:18:03 -0400 Subject: [PATCH 08/13] path-walk: add pl_sparse_trees to control tree pruning The path-walk API prunes trees and blobs when a sparse-checkout pattern list is provided, which is the correct behavior for 'git backfill --sparse' since it only needs to fill in objects at paths within the sparse cone. However, a future change will use the path-walk API with a sparse: filter that restricts only blobs while retaining all reachable trees. To support both behaviors, add a 'pl_sparse_trees' flag to path_walk_info. When set (as in 'git backfill --sparse' and the --stdin-pl test helper mode), the sparse patterns prune both trees and blobs. When unset, only blobs are filtered and all trees are walked and reported. Additionally, move the SEEN flag assignment in add_tree_entries() to after the sparse pattern and pathspec checks. Previously, SEEN was set immediately upon discovering an object, before checking whether its path matched the sparse patterns. When the same object ID appeared at multiple paths (e.g. sibling directories with identical contents), the first path to be visited would mark the object as SEEN. If that path was outside the sparse cone, the object would be skipped there but also never discovered at its in-cone path. By deferring the SEEN flag until after the checks pass, objects that are skipped due to sparse filtering remain discoverable at other paths where they may be in scope. Signed-off-by: Derrick Stolee --- builtin/backfill.c | 1 + path-walk.c | 5 +++-- path-walk.h | 6 ++++++ t/helper/test-path-walk.c | 6 +++++- t/t6601-path-walk.sh | 37 +++++++++++++++++++++++++++++++++++++ 5 files changed, 52 insertions(+), 3 deletions(-) diff --git a/builtin/backfill.c b/builtin/backfill.c index 5254a427111bbd..e71e0f4742c506 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -109,6 +109,7 @@ static int do_backfill(struct backfill_context *ctx) if (ctx->sparse) { CALLOC_ARRAY(info.pl, 1); + info.pl_sparse_trees = 1; if (get_sparse_checkout_patterns(info.pl)) { path_walk_info_clear(&info); return error(_("problem loading sparse-checkout")); diff --git a/path-walk.c b/path-walk.c index 04b924d4deccad..225857bbc8dde6 100644 --- a/path-walk.c +++ b/path-walk.c @@ -183,7 +183,6 @@ static int add_tree_entries(struct path_walk_context *ctx, /* Skip this object if already seen. */ if (o->flags & SEEN) continue; - o->flags |= SEEN; strbuf_setlen(&path, base_len); strbuf_add(&path, entry.path, entry.pathlen); @@ -204,7 +203,8 @@ static int add_tree_entries(struct path_walk_context *ctx, ctx->repo->index); if (ctx->info->pl->use_cone_patterns && - match == NOT_MATCHED) + match == NOT_MATCHED && + (type == OBJ_BLOB || ctx->info->pl_sparse_trees)) continue; else if (!ctx->info->pl->use_cone_patterns && type == OBJ_BLOB && @@ -239,6 +239,7 @@ static int add_tree_entries(struct path_walk_context *ctx, continue; } + o->flags |= SEEN; add_path_to_list(ctx, path.buf, type, &entry.oid, !(o->flags & UNINTERESTING)); diff --git a/path-walk.h b/path-walk.h index 60ceb6543389c3..7e57ae5f65dd98 100644 --- a/path-walk.h +++ b/path-walk.h @@ -76,8 +76,14 @@ struct path_walk_info { * of the cone. If not in cone mode, then all tree paths will be * explored but the path_fn will only be called when the path matches * the sparse-checkout patterns. + * + * When 'pl_sparse_trees' is zero, the sparse patterns only restrict + * blobs and all trees are included in the walk output. This matches + * the behavior of the sparse:oid object filter. When nonzero, trees + * are also pruned by the sparse patterns (as used by backfill). */ struct pattern_list *pl; + int pl_sparse_trees; }; #define PATH_WALK_INFO_INIT { \ diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index 88f86ae0dc1157..3f2b50a9aa16bd 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -68,7 +68,7 @@ static int emit_block(const char *path, struct oid_array *oids, int cmd__path_walk(int argc, const char **argv) { - int res, stdin_pl = 0; + int res, stdin_pl = 0, pl_sparse_trees = -1; struct rev_info revs = REV_INFO_INIT; struct path_walk_info info = PATH_WALK_INFO_INIT; struct path_walk_test_data data = { 0 }; @@ -89,6 +89,8 @@ int cmd__path_walk(int argc, const char **argv) N_("toggle aggressive edge walk")), OPT_BOOL(0, "stdin-pl", &stdin_pl, N_("read a pattern list over stdin")), + OPT_BOOL(0, "pl-sparse-trees", &pl_sparse_trees, + N_("toggle pruning of trees by sparse patterns")), OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END(), }; @@ -116,6 +118,8 @@ int cmd__path_walk(int argc, const char **argv) if (stdin_pl) { struct strbuf in = STRBUF_INIT; CALLOC_ARRAY(info.pl, 1); + info.pl_sparse_trees = (pl_sparse_trees >= 0) ? + pl_sparse_trees : 1; info.pl->use_cone_patterns = 1; diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 45f366d738efac..02ad83dfb0368e 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -206,6 +206,43 @@ test_expect_success 'base & topic, sparse' ' test_cmp_sorted expect out ' +test_expect_success 'base & topic, sparse, no tree pruning' ' + cat >patterns <<-EOF && + /* + !/*/ + /left/ + EOF + + test-tool path-walk --stdin-pl --no-pl-sparse-trees \ + -- base topic out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:blob:a:$(git rev-parse base~2:a) + 3:tree:a/:$(git rev-parse base:a) + 4:tree:left/:$(git rev-parse base:left) + 4:tree:left/:$(git rev-parse base~2:left) + 5:blob:left/b:$(git rev-parse base~2:left/b) + 5:blob:left/b:$(git rev-parse base:left/b) + 6:tree:right/:$(git rev-parse topic:right) + 6:tree:right/:$(git rev-parse base~1:right) + 6:tree:right/:$(git rev-parse base~2:right) + blobs:3 + commits:4 + tags:0 + trees:10 + EOF + + test_cmp_sorted expect out +' + test_expect_success 'topic only' ' test-tool path-walk -- topic >out && From b4dc09ab6903251164f85ea2827448564cca2f7c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 30 Apr 2026 12:19:13 -0400 Subject: [PATCH 09/13] pack-objects: support sparse:oid filter with path-walk The --filter=sparse: option to 'git pack-objects' allows focusing an object set to a sparse-checkout definition. This reduces the set of matching blobs while retaining all reachable trees. No server currently supports fetching with this filter because it is expensive to compute and reachability bitmaps do not help without a significant effort to extend the bitmap feature to store bitmaps for each supported sparse- checkout definition. Without focusing on serving fetches and clones with these filters, there are still benefits that could be realized by making this faster. With the sparse index, it's more realistic now than ever to be able to operate a local clone that was bootstrapped by a packfile created with a sparse filter, because the missing trees are not needed to move a sparse-checkout from one commit to another or to view the history of any path in scope. Such clones could perhaps be bootstrapped by partial bundles. Previously, constructing these sparse packs has been incredibly computationally inefficient. The revision walk that explores which objects are in scope spends a lot of time checking each object to see if it matches the sparse-checkout patterns, causing quadratic behavior (number of objects times number of sparse-checkout patterns). This improves somewhat when using cone-mode sparse-checkout patterns that can use hashtables and prefix matches to determine containment. However, the check per object is still too expensive for most cases. This is where the path-walk feature comes in. We can proceed as normal by placing objects in bins by path and _then_ check a group of objects all at once. Since sparse: only restricts blobs, the path-walk must include all reachable trees while using the cone-mode patterns to skip blobs at paths outside the sparse scope. This establishes a baseline for a potential future "treesparse:" filter that would also restrict trees, but introducing such a new filter is deferred to a later change. The implementation here is focused around loading the sparse-checkout patterns from the provided object ID and checking that the patterns are indeed cone-mode patterns. We can then load the correct pattern list into the path walk context and use the logic that already exists from bff45557675 (backfill: add --sparse option, 2025-02-03), though that feature loads sparse-checkout patterns from the worktree's local settings and also restricts tree objects. We use a combination of errors and warnings to signal problems during this load. The difference is that errors are likely fatal for the non-path-walk version while the warnings are probably just implementation details for the path-walk version and the 'git pack-objects' command can fall back to the revision walk version. Now that the SEEN flag is deferred until after pattern checks (from the previous commit), handle the case where a tree with a shared OID appears at both an out-of-cone and in-cone path. When trees are not being pruned (pl_sparse_trees == 0), the path-walk re-walks the tree at the in-cone path so that in-cone blobs within it are discovered. The new tests in t5317 and t6601 demonstrate this behavior and would fail without these changes. The performance test p5315 shows the impact of this change when using sparse filters: Test HEAD~1 HEAD ---------------------------------------------------------------------- 5315.10: repack (sparse:oid) 77.98 77.47 -0.7% 5315.11: repack size (sparse:oid) 187.5M 187.4M -0.0% 5315.12: repack (sparse:oid, --path-walk) 77.91 31.41 -59.7% 5315.13: repack size (sparse:oid, --path-walk) 187.5M 161.1M -14.1% These performance tests were run on the Git repository. The --path-walk feature shows meaningful space savings (14% smaller for sparse packs) and dramatic time savings (60% faster) by leveraging the path-walk's ability to skip blobs outside the sparse scope. Co-authored-by: Taylor Blau Signed-off-by: Taylor Blaue Signed-off-by: Derrick Stolee --- Documentation/git-backfill.adoc | 4 + Documentation/git-pack-objects.adoc | 3 +- builtin/pack-objects.c | 16 ++- path-walk.c | 81 ++++++++++++++- t/t5317-pack-objects-filter-objects.sh | 125 +++++++++++++++++++++++ t/t6601-path-walk.sh | 131 +++++++++++++++++++++++++ 6 files changed, 350 insertions(+), 10 deletions(-) diff --git a/Documentation/git-backfill.adoc b/Documentation/git-backfill.adoc index c0a3b80615e034..82d6a1969d0542 100644 --- a/Documentation/git-backfill.adoc +++ b/Documentation/git-backfill.adoc @@ -80,6 +80,10 @@ OPTIONS + You may also use commit-limiting options understood by linkgit:git-rev-list[1] such as `--first-parent`, `--since`, or pathspecs. ++ +Most `--filter=` options don't work with the purpose of +`git backfill`, but the `sparse:` filter is integrated to provide a +focused set of paths to download, distinct from the `--sparse` option. SEE ALSO -------- diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index 85ae48b699fd72..e38853391bb589 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -404,7 +404,8 @@ will be automatically changed to version `1`. + Incompatible with `--delta-islands`. The `--use-bitmap-index` option is ignored in the presence of `--path-walk`. The `--path-walk` option -supports the `--filter=` forms `blob:none` and `blob:limit=`. +supports the `--filter=` forms `blob:none`, `blob:limit=`, and +`sparse:`. DELTA ISLANDS diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index bc9fb5b45737a3..b783dc62bc9b77 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4754,7 +4754,7 @@ static int add_objects_by_path(const char *path, return 0; } -static void get_object_list_path_walk(struct rev_info *revs) +static int get_object_list_path_walk(struct rev_info *revs) { struct path_walk_info info = PATH_WALK_INFO_INIT; unsigned int processed = 0; @@ -4777,8 +4777,9 @@ static void get_object_list_path_walk(struct rev_info *revs) result = walk_objects_by_path(&info); trace2_region_leave("pack-objects", "path-walk", revs->repo); - if (result) - die(_("failed to pack objects via path-walk")); + path_walk_info_clear(&info); + + return result; } static void get_object_list(struct rev_info *revs, struct strvec *argv) @@ -4841,8 +4842,13 @@ static void get_object_list(struct rev_info *revs, struct strvec *argv) fn_show_object = show_object; if (path_walk) { - get_object_list_path_walk(revs); - } else { + if (get_object_list_path_walk(revs)) { + warning(_("failed to pack objects via path-walk")); + path_walk = 0; + } + } + + if (!path_walk) { if (prepare_revision_walk(revs)) die(_("revision walk setup failed")); mark_edges_uninteresting(revs, show_edge, sparse); diff --git a/path-walk.c b/path-walk.c index 225857bbc8dde6..ce38dcf1e94728 100644 --- a/path-walk.c +++ b/path-walk.c @@ -10,6 +10,7 @@ #include "hex.h" #include "list-objects.h" #include "list-objects-filter-options.h" +#include "object-name.h" #include "odb.h" #include "object.h" #include "oid-array.h" @@ -180,10 +181,6 @@ static int add_tree_entries(struct path_walk_context *ctx, return -1; } - /* Skip this object if already seen. */ - if (o->flags & SEEN) - continue; - strbuf_setlen(&path, base_len); strbuf_add(&path, entry.path, entry.pathlen); @@ -194,6 +191,40 @@ static int add_tree_entries(struct path_walk_context *ctx, if (type == OBJ_TREE) strbuf_addch(&path, '/'); + if (o->flags & SEEN) { + /* + * A tree with a shared OID may appear at multiple + * paths. Even though we already added this tree to + * the output at some other path, we still need to + * walk into it at this in-cone path to discover + * blobs that were not found at the earlier + * out-of-cone path. + * + * Only do this for paths not yet in our map, to + * avoid duplicate entries when the same tree OID + * appears at the same path across multiple commits. + */ + if (type == OBJ_TREE && ctx->info->pl && + ctx->info->pl->use_cone_patterns && + !ctx->info->pl_sparse_trees && + !strmap_contains(&ctx->paths_to_lists, path.buf)) { + int dtype; + enum pattern_match_result m; + m = path_matches_pattern_list(path.buf, path.len, + path.buf + base_len, + &dtype, + ctx->info->pl, + ctx->repo->index); + if (m != NOT_MATCHED) { + add_path_to_list(ctx, path.buf, type, + &entry.oid, + !(o->flags & UNINTERESTING)); + push_to_stack(ctx, path.buf); + } + } + continue; + } + if (ctx->info->pl) { int dtype; enum pattern_match_result match; @@ -544,6 +575,48 @@ static int prepare_filters(struct path_walk_info *info, } return 1; + case LOFC_SPARSE_OID: + if (info) { + struct object_id sparse_oid; + struct repository *repo = info->revs->repo; + + if (info->pl) { + warning(_("sparse filter cannot be combined with existing sparse patterns")); + return 0; + } + + if (repo_get_oid_with_flags(repo, + options->sparse_oid_name, + &sparse_oid, + GET_OID_BLOB)) { + error(_("unable to access sparse blob in '%s'"), + options->sparse_oid_name); + return 0; + } + + CALLOC_ARRAY(info->pl, 1); + info->pl->use_cone_patterns = 1; + + if (add_patterns_from_blob_to_list(&sparse_oid, "", 0, + info->pl) < 0) { + clear_pattern_list(info->pl); + FREE_AND_NULL(info->pl); + error(_("unable to parse sparse filter data in '%s'"), + oid_to_hex(&sparse_oid)); + return 0; + } + + if (!info->pl->use_cone_patterns) { + clear_pattern_list(info->pl); + FREE_AND_NULL(info->pl); + warning(_("sparse filter is not cone-mode compatible")); + return 0; + } + + list_objects_filter_release(options); + } + return 1; + default: error(_("object filter '%s' not supported by the path-walk API"), list_objects_filter_spec(options)); diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh index 501d715b9a16b7..dddb79ba627036 100755 --- a/t/t5317-pack-objects-filter-objects.sh +++ b/t/t5317-pack-objects-filter-objects.sh @@ -478,4 +478,129 @@ test_expect_success 'verify pack-objects w/ --missing=allow-any' ' EOF ' +# Test that --path-walk produces the same object set as standard traversal +# when using sparse:oid filters with cone-mode patterns. +# +# The sparse:oid filter restricts only blobs, not trees. Both standard +# and path-walk should produce identical sets of blobs, commits, and trees. + +test_expect_success 'setup pw_sparse for path-walk comparison' ' + git init pw_sparse && + mkdir -p pw_sparse/inc/sub pw_sparse/exc/sub && + + for n in 1 2 + do + echo "inc $n" >pw_sparse/inc/file$n && + echo "inc sub $n" >pw_sparse/inc/sub/file$n && + echo "exc $n" >pw_sparse/exc/file$n && + echo "exc sub $n" >pw_sparse/exc/sub/file$n && + echo "root $n" >pw_sparse/root$n || return 1 + done && + + git -C pw_sparse add . && + git -C pw_sparse commit -m "first" && + + echo "inc 1 modified" >pw_sparse/inc/file1 && + echo "exc 1 modified" >pw_sparse/exc/file1 && + echo "root 1 modified" >pw_sparse/root1 && + git -C pw_sparse add . && + git -C pw_sparse commit -m "second" && + + # Cone-mode sparse pattern: include root + inc/ + printf "/*\n!/*/\n/inc/\n" | + git -C pw_sparse hash-object -w --stdin >sparse_oid +' + +test_expect_success 'sparse:oid with --path-walk produces same blobs' ' + oid=$(cat sparse_oid) && + + git -C pw_sparse pack-objects --revs --stdout \ + --filter=sparse:oid=$oid >standard.pack <<-EOF && + HEAD + EOF + git -C pw_sparse index-pack ../standard.pack && + git -C pw_sparse verify-pack -v ../standard.pack >standard_verify && + + git -C pw_sparse pack-objects --revs --stdout \ + --path-walk --filter=sparse:oid=$oid >pathwalk.pack <<-EOF && + HEAD + EOF + git -C pw_sparse index-pack ../pathwalk.pack && + git -C pw_sparse verify-pack -v ../pathwalk.pack >pathwalk_verify && + + # Blobs must match exactly + grep -E "^[0-9a-f]{40} blob" standard_verify | + awk "{print \$1}" | sort >standard_blobs && + grep -E "^[0-9a-f]{40} blob" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_blobs && + test_cmp standard_blobs pathwalk_blobs && + + # Commits must match exactly + grep -E "^[0-9a-f]{40} commit" standard_verify | + awk "{print \$1}" | sort >standard_commits && + grep -E "^[0-9a-f]{40} commit" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_commits && + test_cmp standard_commits pathwalk_commits +' + +test_expect_success 'sparse:oid with --path-walk includes all trees' ' + # The sparse:oid filter restricts only blobs, not trees. + # Both standard and path-walk should include the same trees. + grep -E "^[0-9a-f]{40} tree" standard_verify | + awk "{print \$1}" | sort >standard_trees && + grep -E "^[0-9a-f]{40} tree" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_trees && + + test_cmp standard_trees pathwalk_trees +' + +# Test the edge case where the same tree/blob OID appears at both an +# in-cone and out-of-cone path. When sibling directories have identical +# contents, they share a tree OID. The path-walk defers marking objects +# SEEN until after checking sparse patterns, so an object at an out-of-cone +# path can still be discovered at an in-cone path. + +test_expect_success 'setup pw_shared for shared OID across cone boundary' ' + git init pw_shared && + mkdir pw_shared/aaa pw_shared/zzz && + echo "shared content" >pw_shared/aaa/file && + echo "shared content" >pw_shared/zzz/file && + echo "root file" >pw_shared/rootfile && + git -C pw_shared add . && + git -C pw_shared commit -m "aaa and zzz share tree OID" && + + # Verify they share a tree OID + aaa_tree=$(git -C pw_shared rev-parse HEAD:aaa) && + zzz_tree=$(git -C pw_shared rev-parse HEAD:zzz) && + test "$aaa_tree" = "$zzz_tree" && + + # Cone pattern: include root + zzz/ (not aaa/) + printf "/*\n!/*/\n/zzz/\n" | + git -C pw_shared hash-object -w --stdin >shared_sparse_oid +' + +test_expect_success 'shared tree OID: --path-walk blobs match standard' ' + oid=$(cat shared_sparse_oid) && + + git -C pw_shared pack-objects --revs --stdout \ + --filter=sparse:oid=$oid >shared_std.pack <<-EOF && + HEAD + EOF + git -C pw_shared index-pack ../shared_std.pack && + git -C pw_shared verify-pack -v ../shared_std.pack >shared_std_verify && + + git -C pw_shared pack-objects --revs --stdout \ + --path-walk --filter=sparse:oid=$oid >shared_pw.pack <<-EOF && + HEAD + EOF + git -C pw_shared index-pack ../shared_pw.pack && + git -C pw_shared verify-pack -v ../shared_pw.pack >shared_pw_verify && + + grep -E "^[0-9a-f]{40} blob" shared_std_verify | + awk "{print \$1}" | sort >shared_std_blobs && + grep -E "^[0-9a-f]{40} blob" shared_pw_verify | + awk "{print \$1}" | sort >shared_pw_blobs && + test_cmp shared_std_blobs shared_pw_blobs +' + test_done diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 02ad83dfb0368e..ac294867a509b6 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -596,4 +596,135 @@ test_expect_success 'all, blob:limit=3 filter' ' test_cmp_sorted expect out ' +test_expect_success 'setup sparse filter blob' ' + # Cone-mode patterns: include root, exclude all dirs, include left/ + cat >patterns <<-\EOF && + /* + !/*/ + /left/ + EOF + sparse_oid=$(git hash-object -w -t blob patterns) +' + +test_expect_success 'all, sparse:oid filter' ' + test-tool path-walk --filter=sparse:oid=$sparse_oid -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:blob:a:$(git rev-parse base~2:a) + 5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) + 6:tree:a/:$(git rev-parse base:a) + 7:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 8:tree:left/:$(git rev-parse base:left) + 8:tree:left/:$(git rev-parse base~2:left) + 9:blob:left/b:$(git rev-parse base~2:left/b) + 9:blob:left/b:$(git rev-parse base:left/b) + 10:tree:right/:$(git rev-parse topic:right) + 10:tree:right/:$(git rev-parse base~1:right) + 10:tree:right/:$(git rev-parse base~2:right) + blobs:6 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only, sparse:oid filter' ' + test-tool path-walk --filter=sparse:oid=$sparse_oid -- topic >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:blob:a:$(git rev-parse base~2:a) + 3:tree:left/:$(git rev-parse base~2:left) + 4:blob:left/b:$(git rev-parse base~2:left/b) + 5:tree:right/:$(git rev-parse topic:right) + 5:tree:right/:$(git rev-parse base~1:right) + 5:tree:right/:$(git rev-parse base~2:right) + blobs:2 + commits:3 + tags:0 + trees:7 + EOF + + test_cmp_sorted expect out +' + +# Demonstrate the SEEN flag ordering issue: when the same tree/blob OID +# appears at two sibling paths where one is in-cone and the other is +# out-of-cone, the path-walk must still discover blobs at the in-cone +# path even when the shared tree OID was first encountered out-of-cone. +# Since sparse:oid includes all trees, the out-of-cone tree (aaa/) is +# walked first, and its blob is skipped. The path-walk then re-walks +# the same tree OID at the in-cone path (zzz/) to find the blob there. + +test_expect_success 'setup shared tree OID across cone boundary' ' + git checkout --orphan shared-tree && + git rm -rf . && + mkdir aaa zzz && + echo "shared content" >aaa/file && + echo "shared content" >zzz/file && + echo "root file" >rootfile && + git add aaa zzz rootfile && + git commit -m "aaa and zzz have same tree OID" && + + # Verify they really share a tree OID + aaa_tree=$(git rev-parse HEAD:aaa) && + zzz_tree=$(git rev-parse HEAD:zzz) && + test "$aaa_tree" = "$zzz_tree" && + + # Cone pattern: include root + zzz/ (not aaa/) + cat >shared-patterns <<-\EOF && + /* + !/*/ + /zzz/ + EOF + shared_sparse_oid=$(git hash-object -w -t blob shared-patterns) +' + +test_expect_success 'sparse:oid with shared tree OID across cone boundary' ' + test-tool path-walk \ + --filter=sparse:oid=$shared_sparse_oid \ + -- shared-tree >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse shared-tree) + 1:tree::$(git rev-parse shared-tree^{tree}) + 2:blob:rootfile:$(git rev-parse shared-tree:rootfile) + 3:tree:aaa/:$(git rev-parse shared-tree:aaa) + 4:tree:zzz/:$(git rev-parse shared-tree:zzz) + 5:blob:zzz/file:$(git rev-parse shared-tree:zzz/file) + blobs:2 + commits:1 + tags:0 + trees:3 + EOF + + test_cmp_sorted expect out +' + test_done From 0b1eed07907270668713df5094c2198f6b2e600f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 13 May 2026 14:46:12 -0400 Subject: [PATCH 10/13] t6601: tag otherwise-unreachable trees The tests in t6601-path-walk.sh demonstrate the behavior of the path-walk API under different conditions. One thing that I noticed while updating the behavior of directly-requested objects is that we don't actually emit tagged trees. This was previously not noticed due to those tagged trees actually being reachable from commits that we are including in the path-walk. Update the test setup to have tree-tag and tree-tag2 point to trees that are otherwise unreachable. It is worth noting that this does not meaningfully change any of the other test cases, demontrating the bug. Signed-off-by: Derrick Stolee --- t/t6601-path-walk.sh | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index ac294867a509b6..92c524d145dde3 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -7,17 +7,15 @@ test_description='direct path-walk API tests' test_expect_success 'setup test repository' ' git checkout -b base && - # Make some objects that will only be reachable - # via non-commit tags. - mkdir child && - echo file >child/file && - git add child && - git commit -m "will abandon" && - git tag -a -m "tree" tree-tag HEAD^{tree} && - echo file2 >file2 && - git add file2 && - git commit --amend -m "will abandon" && - git tag tree-tag2 HEAD^{tree} && + # Create tree objects that are only reachable via tags, + # not from any commit in the history. + child_blob_oid=$(echo "child blob content" | git hash-object -t blob -w --stdin) && + child_tree_oid=$(printf "100644 blob %s\tfile\n" "$child_blob_oid" | git mktree) && + tree_tag_oid=$(printf "040000 tree %s\tchild\n" "$child_tree_oid" | git mktree) && + git tag -a -m "tree" tree-tag "$tree_tag_oid" && + file2_blob_oid=$(echo "tagged tree file2" | git hash-object -t blob -w --stdin) && + tree_tag2_oid=$(printf "040000 tree %s\tchild\n100644 blob %s\tfile2\n" "$child_tree_oid" "$file2_blob_oid" | git mktree) && + git tag tree-tag2 "$tree_tag2_oid" && echo blob >file && blob_oid=$(git hash-object -t blob -w --stdin left/b && echo c >right/c && git add . && - git commit --amend -m "first" && + git commit -m "first" && git tag -m "first" first HEAD && echo d >right/d && From b23244c4c274aa2b8006ee71189e6eed2dde6489 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 3 May 2026 20:11:20 -0400 Subject: [PATCH 11/13] path-walk: support `tree:0` filter The `tree:0` object filter omits all trees and blobs from the result, keeping only commits and tags. Consequently, this filter type should has a fairly straightforward integration with path-walk, as the decision to include an object depends only on its type and does not depend on any path-sensitive state. Mapping it onto `path_walk_info` is direct: set `info->trees = 0` and `info->blobs = 0` in `prepare_filters()` when the `LOFC_TREE_DEPTH` choice is requested with depth zero. The existing code already plumbs those flags through the rest of the walk: - 'walk_objects_by_path()' sets `revs->blob_objects = info->blobs` and `revs->tree_objects = info->trees` before `prepare_revision_walk()`, so the revision walk doesn't try to enumerate trees or blobs itself. - The commit-walk loop short-circuits the root-tree fetch with "if (!info->trees && !info->blobs) continue;", so we never even look up the root tree, let alone descend into it. - `setup_pending_objects()` skips pending trees and blobs based on the same flags. This means the path-walk doesn't allocate or expand any tree structures at all under `tree:0`, which matches the intended behavior of the filter. However, this requires first fixing some issues with how the path-walk API handles directly-requested trees _and_ trees requested through lightweight tags. These changes create substantial updates to t6601-path-walk.sh, which the previous change highlighted as a problem by tagging otherwise-unreachable trees and having them not appear in the output. Non-zero tree-depth filters are not supported. Those depend on the depth at which a tree is visited, which is a path-walk concept the filter machinery doesn't currently share with the path-walk API. Reject them in `prepare_filters()` with a helpful error and let pack-objects fall back to the regular traversal, the same way it already does for unsupported filters. Add coverage in t6601 for both `--all` and a single-branch case to confirm that no trees or blobs are emitted, and a separate test that `tree:1` is rejected with the expected error message. Place the new tests before "setup sparse filter blob" so they run on the original set of refs, before the orphan branch that the sparse-tree tests create. Signed-off-by: Taylor Blau Signed-off-by: Derrick Stolee --- Documentation/git-pack-objects.adoc | 4 +- path-walk.c | 53 +++++++-- t/t6601-path-walk.sh | 165 ++++++++++++++++++---------- 3 files changed, 152 insertions(+), 70 deletions(-) diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index e38853391bb589..c86219be911a5d 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -404,8 +404,8 @@ will be automatically changed to version `1`. + Incompatible with `--delta-islands`. The `--use-bitmap-index` option is ignored in the presence of `--path-walk`. The `--path-walk` option -supports the `--filter=` forms `blob:none`, `blob:limit=`, and -`sparse:`. +supports the `--filter=` forms `blob:none`, `blob:limit=`, +`tree:0`, and `sparse:`. DELTA ISLANDS diff --git a/path-walk.c b/path-walk.c index ce38dcf1e94728..cb67b8ce866aba 100644 --- a/path-walk.c +++ b/path-walk.c @@ -390,11 +390,18 @@ static int walk_path(struct path_walk_context *ctx, ctx->info->path_fn_data); } - /* Expand data for children. */ - if (list->type == OBJ_TREE) { + /* + * Expand tree children, except when the set is directly requested + * _and_ we are otherwise filtering out trees. + */ + if (list->type == OBJ_TREE && + (!path_is_for_direct_objects(path) || ctx->info->trees)) { + /* Use root path if expanding from tagged/direct trees. */ + const char *expand_path = !strcmp(path, "/tagged-trees") + ? root_path : path; for (size_t i = 0; i < list->oids.nr; i++) { ret |= add_tree_entries(ctx, - path, + expand_path, &list->oids.oid[i]); } } @@ -442,12 +449,12 @@ static int setup_pending_objects(struct path_walk_info *info, { struct type_and_oid_list *tags = NULL; struct type_and_oid_list *tagged_blobs = NULL; - struct type_and_oid_list *root_tree_list = NULL; + struct type_and_oid_list *tagged_trees = NULL; if (info->tags) CALLOC_ARRAY(tags, 1); CALLOC_ARRAY(tagged_blobs, 1); - root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); + CALLOC_ARRAY(tagged_trees, 1); /* * Pending objects include: @@ -491,14 +498,15 @@ static int setup_pending_objects(struct path_walk_info *info, switch (obj->type) { case OBJ_TREE: - if (pending->path) { - char *path = *pending->path ? xstrfmt("%s/", pending->path) - : xstrdup(""); + if (pending->path && *pending->path) { + char *path = xstrfmt("%s/", pending->path); add_path_to_list(ctx, path, OBJ_TREE, &obj->oid, 1); free(path); + } else if (!pending->path || !info->trees) { + oid_array_append(&tagged_trees->oids, &obj->oid); } else { - /* assume a root tree, such as a lightweight tag. */ - oid_array_append(&root_tree_list->oids, &obj->oid); + add_path_to_list(ctx, root_path, OBJ_TREE, + &obj->oid, 1); } break; @@ -535,6 +543,18 @@ static int setup_pending_objects(struct path_walk_info *info, free(tagged_blobs); } } + if (tagged_trees) { + if (tagged_trees->oids.nr) { + const char *tagged_tree_path = "/tagged-trees"; + tagged_trees->type = OBJ_TREE; + tagged_trees->maybe_interesting = 1; + strmap_put(&ctx->paths_to_lists, tagged_tree_path, tagged_trees); + push_to_stack(ctx, tagged_tree_path); + } else { + oid_array_clear(&tagged_trees->oids); + free(tagged_trees); + } + } if (tags) { if (tags->oids.nr) { const char *tag_path = "/tags"; @@ -575,6 +595,19 @@ static int prepare_filters(struct path_walk_info *info, } return 1; + case LOFC_TREE_DEPTH: + if (options->tree_exclude_depth) { + error(_("tree:%lu filter not supported by the path-walk API"), + options->tree_exclude_depth); + return 0; + } + if (info) { + info->trees = 0; + info->blobs = 0; + list_objects_filter_release(options); + } + return 1; + case LOFC_SPARSE_OID: if (info) { struct object_id sparse_oid; diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 92c524d145dde3..566db7c7e3ef2e 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -77,23 +77,23 @@ test_expect_success 'all' ' 3:tree::$(git rev-parse base^{tree}) 3:tree::$(git rev-parse base~1^{tree}) 3:tree::$(git rev-parse base~2^{tree}) - 3:tree::$(git rev-parse refs/tags/tree-tag^{}) - 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) 4:blob:a:$(git rev-parse base~2:a) - 5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) - 6:tree:a/:$(git rev-parse base:a) - 7:tree:child/:$(git rev-parse refs/tags/tree-tag:child) - 8:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file) - 9:tree:left/:$(git rev-parse base:left) - 9:tree:left/:$(git rev-parse base~2:left) - 10:blob:left/b:$(git rev-parse base~2:left/b) - 10:blob:left/b:$(git rev-parse base:left/b) - 11:tree:right/:$(git rev-parse topic:right) - 11:tree:right/:$(git rev-parse base~1:right) - 11:tree:right/:$(git rev-parse base~2:right) - 12:blob:right/c:$(git rev-parse base~2:right/c) - 12:blob:right/c:$(git rev-parse topic:right/c) - 13:blob:right/d:$(git rev-parse base~1:right/d) + 5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{}) + 5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{}) + 6:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) + 7:tree:a/:$(git rev-parse base:a) + 8:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 9:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file) + 10:tree:left/:$(git rev-parse base:left) + 10:tree:left/:$(git rev-parse base~2:left) + 11:blob:left/b:$(git rev-parse base~2:left/b) + 11:blob:left/b:$(git rev-parse base:left/b) + 12:tree:right/:$(git rev-parse topic:right) + 12:tree:right/:$(git rev-parse base~1:right) + 12:tree:right/:$(git rev-parse base~2:right) + 13:blob:right/c:$(git rev-parse base~2:right/c) + 13:blob:right/c:$(git rev-parse topic:right/c) + 14:blob:right/d:$(git rev-parse base~1:right/d) blobs:10 commits:4 tags:7 @@ -471,15 +471,15 @@ test_expect_success 'all, blob:none filter' ' 3:tree::$(git rev-parse base^{tree}) 3:tree::$(git rev-parse base~1^{tree}) 3:tree::$(git rev-parse base~2^{tree}) - 3:tree::$(git rev-parse refs/tags/tree-tag^{}) - 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) - 4:tree:a/:$(git rev-parse base:a) - 5:tree:child/:$(git rev-parse refs/tags/tree-tag:child) - 6:tree:left/:$(git rev-parse base:left) - 6:tree:left/:$(git rev-parse base~2:left) - 7:tree:right/:$(git rev-parse topic:right) - 7:tree:right/:$(git rev-parse base~1:right) - 7:tree:right/:$(git rev-parse base~2:right) + 4:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{}) + 4:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{}) + 5:tree:a/:$(git rev-parse base:a) + 6:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 7:tree:left/:$(git rev-parse base:left) + 7:tree:left/:$(git rev-parse base~2:left) + 8:tree:right/:$(git rev-parse topic:right) + 8:tree:right/:$(git rev-parse base~1:right) + 8:tree:right/:$(git rev-parse base~2:right) blobs:2 commits:4 tags:7 @@ -533,15 +533,15 @@ test_expect_success 'all, blob:limit=0 filter' ' 3:tree::$(git rev-parse base^{tree}) 3:tree::$(git rev-parse base~1^{tree}) 3:tree::$(git rev-parse base~2^{tree}) - 3:tree::$(git rev-parse refs/tags/tree-tag^{}) - 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) - 4:tree:a/:$(git rev-parse base:a) - 5:tree:child/:$(git rev-parse refs/tags/tree-tag:child) - 6:tree:left/:$(git rev-parse base:left) - 6:tree:left/:$(git rev-parse base~2:left) - 7:tree:right/:$(git rev-parse topic:right) - 7:tree:right/:$(git rev-parse base~1:right) - 7:tree:right/:$(git rev-parse base~2:right) + 4:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{}) + 4:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{}) + 5:tree:a/:$(git rev-parse base:a) + 6:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 7:tree:left/:$(git rev-parse base:left) + 7:tree:left/:$(git rev-parse base~2:left) + 8:tree:right/:$(git rev-parse topic:right) + 8:tree:right/:$(git rev-parse base~1:right) + 8:tree:right/:$(git rev-parse base~2:right) blobs:2 commits:4 tags:7 @@ -572,19 +572,19 @@ test_expect_success 'all, blob:limit=3 filter' ' 3:tree::$(git rev-parse base^{tree}) 3:tree::$(git rev-parse base~1^{tree}) 3:tree::$(git rev-parse base~2^{tree}) - 3:tree::$(git rev-parse refs/tags/tree-tag^{}) - 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) 4:blob:a:$(git rev-parse base~2:a) - 5:tree:a/:$(git rev-parse base:a) - 6:tree:child/:$(git rev-parse refs/tags/tree-tag:child) - 7:tree:left/:$(git rev-parse base:left) - 7:tree:left/:$(git rev-parse base~2:left) - 8:blob:left/b:$(git rev-parse base~2:left/b) - 9:tree:right/:$(git rev-parse topic:right) - 9:tree:right/:$(git rev-parse base~1:right) - 9:tree:right/:$(git rev-parse base~2:right) - 10:blob:right/c:$(git rev-parse base~2:right/c) - 11:blob:right/d:$(git rev-parse base~1:right/d) + 5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{}) + 5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{}) + 6:tree:a/:$(git rev-parse base:a) + 7:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 8:tree:left/:$(git rev-parse base:left) + 8:tree:left/:$(git rev-parse base~2:left) + 9:blob:left/b:$(git rev-parse base~2:left/b) + 10:tree:right/:$(git rev-parse topic:right) + 10:tree:right/:$(git rev-parse base~1:right) + 10:tree:right/:$(git rev-parse base~2:right) + 11:blob:right/c:$(git rev-parse base~2:right/c) + 12:blob:right/d:$(git rev-parse base~1:right/d) blobs:6 commits:4 tags:7 @@ -594,6 +594,55 @@ test_expect_success 'all, blob:limit=3 filter' ' test_cmp_sorted expect out ' +test_expect_success 'all, tree:0 filter' ' + test-tool path-walk --filter=tree:0 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{tree}) + 3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2) + blobs:2 + commits:4 + tags:7 + trees:2 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only, tree:0 filter' ' + test-tool path-walk --filter=tree:0 -- topic >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + blobs:0 + commits:3 + tags:0 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'tree:1 filter is rejected' ' + test_must_fail test-tool path-walk --filter=tree:1 -- --all 2>err && + test_grep "tree:1 filter not supported by the path-walk API" err +' + test_expect_success 'setup sparse filter blob' ' # Cone-mode patterns: include root, exclude all dirs, include left/ cat >patterns <<-\EOF && @@ -625,19 +674,19 @@ test_expect_success 'all, sparse:oid filter' ' 3:tree::$(git rev-parse base^{tree}) 3:tree::$(git rev-parse base~1^{tree}) 3:tree::$(git rev-parse base~2^{tree}) - 3:tree::$(git rev-parse refs/tags/tree-tag^{}) - 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) 4:blob:a:$(git rev-parse base~2:a) - 5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) - 6:tree:a/:$(git rev-parse base:a) - 7:tree:child/:$(git rev-parse refs/tags/tree-tag:child) - 8:tree:left/:$(git rev-parse base:left) - 8:tree:left/:$(git rev-parse base~2:left) - 9:blob:left/b:$(git rev-parse base~2:left/b) - 9:blob:left/b:$(git rev-parse base:left/b) - 10:tree:right/:$(git rev-parse topic:right) - 10:tree:right/:$(git rev-parse base~1:right) - 10:tree:right/:$(git rev-parse base~2:right) + 5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{}) + 5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{}) + 6:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) + 7:tree:a/:$(git rev-parse base:a) + 8:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 9:tree:left/:$(git rev-parse base:left) + 9:tree:left/:$(git rev-parse base~2:left) + 10:blob:left/b:$(git rev-parse base~2:left/b) + 10:blob:left/b:$(git rev-parse base:left/b) + 11:tree:right/:$(git rev-parse topic:right) + 11:tree:right/:$(git rev-parse base~1:right) + 11:tree:right/:$(git rev-parse base~2:right) blobs:6 commits:4 tags:7 From 7e1e503361e5d997d904ac101a776ecffdac6059 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 3 May 2026 20:11:23 -0400 Subject: [PATCH 12/13] path-walk: support `object:type` filter The `object:type` filter accepts only objects of a single type; it is the second member of the object-info-only filter family that bitmap traversal already supports. Like `blob:none` and `tree:0`, it can be evaluated with nothing more than the object's type, which is exactly the granularity path-walk's existing info->{commits,trees,blobs,tags} flags already control. Map `LOFC_OBJECT_TYPE` in `prepare_filters()` by AND-ing each flag against the filtered type. A single `object:type=X` filter applied to the default info (all flags = 1) leaves `info->X = 1` and all the others 0, which is what we want. Using an AND rather than straight assignment prepares us for a subsequent change to implement combined object filters. The path-walk machinery is mostly already wired for the per-type distinction: - `walk_path()` calls `path_fn` for a batch only when the corresponding `info->X` flag is set, so unwanted types are silently not reported. - `add_tree_entries()` skips tree entries of type `OBJ_BLOB` when `info->blobs` is unset, so we don't even allocate paths for them. - The commit-walk loop short-circuits the root-tree fetch when `!info->trees && !info->blobs`, so commit-only filters don't descend into trees at all. But there are a couple of side effects of the "trees off, blobs on" case that need fixing: 1. 'setup_pending_objects()' previously skipped pending trees as soon as `info->trees` was zero. For 'object:type=blob' the call site needs those pending trees: a lightweight tag pointing to a tree, or an annotated tag whose peeled target is a tree, can both reach blobs that are otherwise unreachable from any commit's root tree. Loosen the gate to "if (!info->trees && !info->blobs) continue" and similarly retrieve the root_tree_list whenever either trees or blobs are wanted. 2. The revision machinery's `handle_commit()` drops pending trees when `revs->tree_objects` is zero (see the 'OBJ_TREE' handler in revision.c), so by the time path-walk sees the pending list after `prepare_revision_walk()` the tree-bearing pendings would already be gone. Fix this by setting revs->tree_objects = info->trees || info->blobs so pending trees survive `prepare_revision_walk()` whenever we need to walk into them. Path-walk still resets tree_objects to zero immediately after `prepare_revision_walk()` returns, so the rev-walk itself never enumerates trees redundantly with path-walk's own descent. Add coverage in t6601 for each of the four `object:type` values. The 'object:type=blob' test in particular asserts that file2 and child/file (both reachable only through tag-pointed trees) show up in the output, exercising the pending-tree fix. Update Documentation/git-pack-objects.adoc to add object:type to the list of supported --filter forms. Signed-off-by: Taylor Blau Signed-off-by: Derrick Stolee --- Documentation/git-pack-objects.adoc | 2 +- path-walk.c | 13 ++++- path-walk.h | 6 +++ t/t6601-path-walk.sh | 84 +++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 2 deletions(-) diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index c86219be911a5d..f2852ebd3172dc 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -405,7 +405,7 @@ will be automatically changed to version `1`. Incompatible with `--delta-islands`. The `--use-bitmap-index` option is ignored in the presence of `--path-walk`. The `--path-walk` option supports the `--filter=` forms `blob:none`, `blob:limit=`, -`tree:0`, and `sparse:`. +`tree:0`, `object:type=`, and `sparse:`. DELTA ISLANDS diff --git a/path-walk.c b/path-walk.c index cb67b8ce866aba..418972e753840d 100644 --- a/path-walk.c +++ b/path-walk.c @@ -382,7 +382,7 @@ static int walk_path(struct path_walk_context *ctx, ret = ctx->info->path_fn(path, &filtered, list->type, ctx->info->path_fn_data); oid_array_clear(&filtered); - } else if (path_is_for_direct_objects(path) || + } else if ((!ctx->info->strict_types && path_is_for_direct_objects(path)) || (list->type == OBJ_TREE && ctx->info->trees) || (list->type == OBJ_BLOB && ctx->info->blobs) || (list->type == OBJ_TAG && ctx->info->tags)) { @@ -608,6 +608,17 @@ static int prepare_filters(struct path_walk_info *info, } return 1; + case LOFC_OBJECT_TYPE: + if (info) { + info->commits &= options->object_type == OBJ_COMMIT; + info->tags &= options->object_type == OBJ_TAG; + info->trees &= options->object_type == OBJ_TREE; + info->blobs &= options->object_type == OBJ_BLOB; + info->strict_types = 1; + list_objects_filter_release(options); + } + return 1; + case LOFC_SPARSE_OID: if (info) { struct object_id sparse_oid; diff --git a/path-walk.h b/path-walk.h index 7e57ae5f65dd98..a2652b2d465edf 100644 --- a/path-walk.h +++ b/path-walk.h @@ -47,6 +47,12 @@ struct path_walk_info { int blobs; int tags; + /** + * If 'strict_types' is 0, then direct object requests will no longer + * override the object type restrictions. + */ + int strict_types; + /** * If non-zero, specifies a maximum blob size. Blobs with a * size equal to or greater than this limit will not be diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 566db7c7e3ef2e..0fd8e61c76f458 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -643,6 +643,90 @@ test_expect_success 'tree:1 filter is rejected' ' test_grep "tree:1 filter not supported by the path-walk API" err ' +test_expect_success 'all, object:type=commit filter' ' + test-tool path-walk --filter=object:type=commit -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + blobs:0 + commits:4 + tags:0 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, object:type=tag filter' ' + test-tool path-walk --filter=object:type=tag -- --all >out && + + cat >expect <<-EOF && + 0:tag:/tags:$(git rev-parse refs/tags/first) + 0:tag:/tags:$(git rev-parse refs/tags/second.1) + 0:tag:/tags:$(git rev-parse refs/tags/second.2) + 0:tag:/tags:$(git rev-parse refs/tags/third) + 0:tag:/tags:$(git rev-parse refs/tags/fourth) + 0:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 0:tag:/tags:$(git rev-parse refs/tags/blob-tag) + blobs:0 + commits:0 + tags:7 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, object:type=tree filter' ' + test-tool path-walk --filter=object:type=tree -- --all >out && + + cat >expect <<-EOF && + 0:tree::$(git rev-parse topic^{tree}) + 0:tree::$(git rev-parse base^{tree}) + 0:tree::$(git rev-parse base~1^{tree}) + 0:tree::$(git rev-parse base~2^{tree}) + 1:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{}) + 1:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{}) + 2:tree:a/:$(git rev-parse base:a) + 3:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 4:tree:left/:$(git rev-parse base:left) + 4:tree:left/:$(git rev-parse base~2:left) + 5:tree:right/:$(git rev-parse topic:right) + 5:tree:right/:$(git rev-parse base~1:right) + 5:tree:right/:$(git rev-parse base~2:right) + blobs:0 + commits:0 + tags:0 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, object:type=blob filter' ' + test-tool path-walk --filter=object:type=blob -- --all >out && + + cat >expect <<-EOF && + 0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 1:blob:a:$(git rev-parse base~2:a) + 2:blob:left/b:$(git rev-parse base:left/b) + 2:blob:left/b:$(git rev-parse base~2:left/b) + 3:blob:right/c:$(git rev-parse base~2:right/c) + 3:blob:right/c:$(git rev-parse topic:right/c) + 4:blob:right/d:$(git rev-parse base~1:right/d) + blobs:8 + commits:0 + tags:0 + trees:0 + EOF + + test_cmp_sorted expect out +' + test_expect_success 'setup sparse filter blob' ' # Cone-mode patterns: include root, exclude all dirs, include left/ cat >patterns <<-\EOF && From a615b1a7078a6f092deb180d135f32c313094315 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 3 May 2026 20:11:26 -0400 Subject: [PATCH 13/13] path-walk: support `combine` filter The `combine` filter takes the intersection of its children, that is: objects are shown only when all child filters would admit the object. The preceding patches added support for many individual filter types. Enable users to compose these filters by implementing support for the `combine` filter type. Mapping intersection onto path_walk_info works because every supported child filter is a monotonic restriction: - `blob:none`, `tree:0` unconditionally clear `info->blobs` and (for `tree:0`) `info->trees`; clearing an already-cleared flag is a no-op. - `object:type=X` is now expressed as an AND of each type flag with the filtered type, so applying multiple such filters only refines the existing set rather than overwrites it. - `blob:limit=N` has to compose too: the intersection of "size < L1" and "size < L2" is "size < min(L1, L2)". Update the `LOFC_BLOB_LIMIT` handler to take the running minimum when `info->blob_limit` is already set, so a combined filter with, e.g., both "blob:limit=10" and "blob:limit=5" produces a limit of 5 regardless of ordering. - `sparse:oid` is left unchanged. A `combine` filter that includes a `sparse:oid` is allowed at most once, since the existing handler refuses to overwrite `info->pl`. Two `sparse:oid` filters in a single `combine` would be unusual and are rejected with a warning, matching the standalone `sparse:oid` behavior. Implementation-wise, the existing `prepare_filters()` called `list_objects_filter_release()` inside each case branch. That works fine for top-level filters, but `combine` filters need to recurse over its child filters without releasing each one in turn (since the parent's release iterates the sub array). Split `prepare_filters()` into a recursive helper that performs only the mutation, plus a thin wrapper that calls the helper and then releases the top-level filter once. The `LOFC_COMBINE` case in the helper just walks `sub_nr` and recurses; child filters are released by the wrapper's single `list_objects_filter_release()` call on the parent (which itself recursively releases each sub-filter, the same way it always has). If any sub-filter is unsupported (e.g. "tree:1", "sparse:", or a not-yet-supported choice), the recursion bubbles a failure up and the existing pack-objects/backfill fallback paths kick in. Add coverage in t6601: - "combine:blob:none+tree:0" collapses to "tree:0" - "combine:object:type=blob+blob:limit=3" yields only the blobs smaller than three bytes - "combine:object:type=blob+object:type=tree" intersects to empty - "combine:tree:1+blob:none" reports the "tree:1" error. Update Documentation/git-pack-objects.adoc to add combine to the list of supported --filter forms. Signed-off-by: Taylor Blau Signed-off-by: Derrick Stolee --- Documentation/git-pack-objects.adoc | 3 +- path-walk.c | 25 ++++++++-- t/t6601-path-walk.sh | 71 +++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 6 deletions(-) diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index f2852ebd3172dc..8a27aa19fd3f1f 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -405,7 +405,8 @@ will be automatically changed to version `1`. Incompatible with `--delta-islands`. The `--use-bitmap-index` option is ignored in the presence of `--path-walk`. The `--path-walk` option supports the `--filter=` forms `blob:none`, `blob:limit=`, -`tree:0`, `object:type=`, and `sparse:`. +`tree:0`, `object:type=`, and `sparse:`. These supported filter +types can be combined with the `combine:+` form. DELTA ISLANDS diff --git a/path-walk.c b/path-walk.c index 418972e753840d..94ff90bd1566b6 100644 --- a/path-walk.c +++ b/path-walk.c @@ -571,8 +571,8 @@ static int setup_pending_objects(struct path_walk_info *info, return 0; } -static int prepare_filters(struct path_walk_info *info, - struct list_objects_filter_options *options) +static int prepare_filters_one(struct path_walk_info *info, + struct list_objects_filter_options *options) { switch (options->choice) { case LOFC_DISABLED: @@ -589,7 +589,8 @@ static int prepare_filters(struct path_walk_info *info, if (info) { if (!options->blob_limit_value) info->blobs = 0; - else + else if (!info->blob_limit || + info->blob_limit > options->blob_limit_value) info->blob_limit = options->blob_limit_value; list_objects_filter_release(options); } @@ -604,7 +605,6 @@ static int prepare_filters(struct path_walk_info *info, if (info) { info->trees = 0; info->blobs = 0; - list_objects_filter_release(options); } return 1; @@ -656,8 +656,13 @@ static int prepare_filters(struct path_walk_info *info, warning(_("sparse filter is not cone-mode compatible")); return 0; } + } + return 1; - list_objects_filter_release(options); + case LOFC_COMBINE: + for (size_t i = 0; i < options->sub_nr; i++) { + if (!prepare_filters_one(info, &options->sub[i])) + return 0; } return 1; @@ -668,6 +673,16 @@ static int prepare_filters(struct path_walk_info *info, } } +static int prepare_filters(struct path_walk_info *info, + struct list_objects_filter_options *options) +{ + if (!prepare_filters_one(info, options)) + return 0; + if (info) + list_objects_filter_release(options); + return 1; +} + int path_walk_filter_compatible(struct list_objects_filter_options *options) { return prepare_filters(NULL, options); diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 0fd8e61c76f458..e9fcd85e7520bf 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -727,6 +727,77 @@ test_expect_success 'all, object:type=blob filter' ' test_cmp_sorted expect out ' +test_expect_success 'all, combine:blob:none+tree:0 filter' ' + test-tool path-walk \ + --filter=combine:blob:none+tree:0 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{tree}) + 3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2) + blobs:2 + commits:4 + tags:7 + trees:2 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, combine:object:type=blob+blob:limit=3 filter' ' + test-tool path-walk \ + --filter=combine:object:type=blob+blob:limit=3 \ + -- --all >out && + + cat >expect <<-EOF && + 0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 1:blob:a:$(git rev-parse base~2:a) + 2:blob:left/b:$(git rev-parse base~2:left/b) + 3:blob:right/c:$(git rev-parse base~2:right/c) + 4:blob:right/d:$(git rev-parse base~1:right/d) + blobs:6 + commits:0 + tags:0 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, combine of disjoint object:types is empty' ' + test-tool path-walk \ + --filter=combine:object:type=blob+object:type=tree \ + -- --all >out && + + cat >expect <<-EOF && + blobs:0 + commits:0 + tags:0 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'combine: rejects unsupported subfilters' ' + test_must_fail test-tool path-walk \ + --filter=combine:tree:1+blob:none -- --all 2>err && + test_grep "tree:1 filter not supported by the path-walk API" err +' + test_expect_success 'setup sparse filter blob' ' # Cone-mode patterns: include root, exclude all dirs, include left/ cat >patterns <<-\EOF &&