Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 51 additions & 6 deletions builtin/checkout.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "revision.h"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Junio C Hamano wrote on the Git mailing list (how to reply to this email):

"Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Derrick Stolee <stolee@gmail.com>
>
> Teach update_some() to handle sparse directory entries at the tree
> level rather than expanding the entire sparse index. When iterating a
> source tree during checkout/restore operations:
>
>  - If a directory matches a sparse directory entry with the same OID,
>    skip it entirely (no change needed).
>
>  - If the OID differs and we are in non-overlay mode (e.g., restore
>    --staged), update the sparse directory entry's OID in place. This
>    is semantically correct because non-overlay mode removes paths not
>    in the source tree anyway.
>
>  - In overlay mode (e.g., checkout <tree> -- .), fall through to
>    recursive descent so individual file entries are preserved
>    correctly.
>
> Also switch from index_name_pos() to index_name_pos_sparse() for
> individual file lookups to avoid triggering ensure_full_index() when
> the file is already individually tracked in the index.
>
> Update the test expectation in t1092 to assert that 'restore --staged'
> no longer expands the sparse index.
>
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
>  builtin/checkout.c                       | 57 +++++++++++++++++++++---
>  t/t1092-sparse-checkout-compatibility.sh |  8 ++--
>  2 files changed, 55 insertions(+), 10 deletions(-)
>
> diff --git a/builtin/checkout.c b/builtin/checkout.c
> index 1345e8574a..67f03dea10 100644
> --- a/builtin/checkout.c
> +++ b/builtin/checkout.c
> @@ -31,6 +31,7 @@
>  #include "revision.h"
>  #include "sequencer.h"
>  #include "setup.h"
> +#include "sparse-index.h"
>  #include "strvec.h"
>  #include "submodule.h"
>  #include "symlinks.h"
> @@ -142,14 +143,56 @@ static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm
>  }
>  
>  static int update_some(const struct object_id *oid, struct strbuf *base,
> -		       const char *pathname, unsigned mode, void *context UNUSED)
> +		       const char *pathname, unsigned mode, void *context)
>  {
>  	int len;
>  	struct cache_entry *ce;
>  	int pos;
> +	int overlay_mode = context ? *((int *)context) : 1;
>  
> -	if (S_ISDIR(mode))
> +	if (S_ISDIR(mode)) {
> +		/*
> +		 * If this directory exists as a sparse directory entry in
> +		 * the index, we can handle it at the tree level without
> +		 * descending into individual files.
> +		 */
> +		if (the_repository->index->sparse_index) {

I wonder if this deep nesting is a sign that the newly added code
from here to ...

> +			struct strbuf dirpath = STRBUF_INIT;
> +
> +			strbuf_addbuf(&dirpath, base);
> +			strbuf_addstr(&dirpath, pathname);
> +			strbuf_addch(&dirpath, '/');
> +
> +			pos = index_name_pos_sparse(the_repository->index,
> +						    dirpath.buf, dirpath.len);
> +			if (pos >= 0) {
> +				struct cache_entry *old =
> +					the_repository->index->cache[pos];
> +				if (S_ISSPARSEDIR(old->ce_mode)) {
> +					if (oideq(oid, &old->oid)) {
> +						strbuf_release(&dirpath);
> +						return 0;
> +					}
> +					if (!overlay_mode) {
> +						/*
> +						 * In non-overlay mode (e.g.,
> +						 * restore --staged), we can
> +						 * replace the sparse dir OID
> +						 * directly since files not in
> +						 * the source tree should be
> +						 * removed anyway.
> +						 */
> +						oidcpy(&old->oid, oid);
> +						old->ce_flags |= CE_UPDATE;
> +						strbuf_release(&dirpath);
> +						return 0;
> +					}
> +				}
> +			}
> +			strbuf_release(&dirpath);
> +		}

... here may become easier to understand if it is made into a small
helper function with a descriptive name.

>  		return READ_TREE_RECURSIVE;
> +	}
>  
>  	len = base->len + strlen(pathname);
>  	ce = make_empty_cache_entry(the_repository->index, len);
> @@ -165,7 +208,7 @@ static int update_some(const struct object_id *oid, struct strbuf *base,
>  	 * entry in place. Whether it is UPTODATE or not, checkout_entry will
>  	 * do the right thing.
>  	 */
> -	pos = index_name_pos(the_repository->index, ce->name, ce->ce_namelen);
> +	pos = index_name_pos_sparse(the_repository->index, ce->name, ce->ce_namelen);
>  	if (pos >= 0) {
>  		struct cache_entry *old = the_repository->index->cache[pos];
>  		if (ce->ce_mode == old->ce_mode &&
> @@ -182,10 +225,11 @@ static int update_some(const struct object_id *oid, struct strbuf *base,
>  	return 0;
>  }
>  
> -static int read_tree_some(struct tree *tree, const struct pathspec *pathspec)
> +static int read_tree_some(struct tree *tree, const struct pathspec *pathspec,
> +			  int overlay_mode)
>  {
>  	read_tree(the_repository, tree,
> -		  pathspec, update_some, NULL);
> +		  pathspec, update_some, &overlay_mode);
>  
>  	/* update the index with the given tree's info
>  	 * for all args, expanding wildcards, and exit
> @@ -580,7 +624,8 @@ static int checkout_paths(const struct checkout_opts *opts,
>  		return error(_("index file corrupt"));
>  
>  	if (opts->source_tree)
> -		read_tree_some(opts->source_tree, &opts->pathspec);
> +		read_tree_some(opts->source_tree, &opts->pathspec,
> +			       opts->overlay_mode);
>  	if (opts->merge)
>  		unmerge_index(the_repository->index, &opts->pathspec, CE_MATCHED);
>  
> diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
> index d69434e7ab..8186da5c88 100755
> --- a/t/t1092-sparse-checkout-compatibility.sh
> +++ b/t/t1092-sparse-checkout-compatibility.sh
> @@ -2608,19 +2608,19 @@ test_expect_success 'restore --staged with wildcards' '
>  	test_all_match git diff --cached
>  '
>  
> -test_expect_success 'sparse-index is expanded: restore --staged' '
> +test_expect_success 'sparse-index is not expanded: restore --staged' '
>  	init_repos &&
>  
>  	git -C sparse-index checkout -b restore-staged-exp base &&
>  	git -C sparse-index reset --soft update-folder1 &&
> -	ensure_expanded restore --staged .
> +	ensure_not_expanded restore --staged .
>  '
>  
> -test_expect_success 'sparse-index is expanded: restore --source --staged' '
> +test_expect_success 'sparse-index is not expanded: restore --source --staged' '
>  	init_repos &&
>  
>  	git -C sparse-index checkout -b restore-source-staged base &&
> -	ensure_expanded restore --source update-folder1 --staged .
> +	ensure_not_expanded restore --source update-folder1 --staged .
>  '

Very nice.

#include "sequencer.h"
#include "setup.h"
#include "sparse-index.h"
#include "strvec.h"
#include "submodule.h"
#include "symlinks.h"
Expand Down Expand Up @@ -142,14 +143,56 @@ static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm
}

static int update_some(const struct object_id *oid, struct strbuf *base,
const char *pathname, unsigned mode, void *context UNUSED)
const char *pathname, unsigned mode, void *context)
{
int len;
struct cache_entry *ce;
int pos;
int overlay_mode = context ? *((int *)context) : 1;

if (S_ISDIR(mode))
if (S_ISDIR(mode)) {
/*
* If this directory exists as a sparse directory entry in
* the index, we can handle it at the tree level without
* descending into individual files.
*/
if (the_repository->index->sparse_index) {
struct strbuf dirpath = STRBUF_INIT;

strbuf_addbuf(&dirpath, base);
strbuf_addstr(&dirpath, pathname);
strbuf_addch(&dirpath, '/');

pos = index_name_pos_sparse(the_repository->index,
dirpath.buf, dirpath.len);
if (pos >= 0) {
struct cache_entry *old =
the_repository->index->cache[pos];
if (S_ISSPARSEDIR(old->ce_mode)) {
if (oideq(oid, &old->oid)) {
strbuf_release(&dirpath);
return 0;
}
if (!overlay_mode) {
/*
* In non-overlay mode (e.g.,
* restore --staged), we can
* replace the sparse dir OID
* directly since files not in
* the source tree should be
* removed anyway.
*/
oidcpy(&old->oid, oid);
old->ce_flags |= CE_UPDATE;
strbuf_release(&dirpath);
return 0;
}
}
}
strbuf_release(&dirpath);
}
return READ_TREE_RECURSIVE;
}

len = base->len + strlen(pathname);
ce = make_empty_cache_entry(the_repository->index, len);
Expand All @@ -165,7 +208,7 @@ static int update_some(const struct object_id *oid, struct strbuf *base,
* entry in place. Whether it is UPTODATE or not, checkout_entry will
* do the right thing.
*/
pos = index_name_pos(the_repository->index, ce->name, ce->ce_namelen);
pos = index_name_pos_sparse(the_repository->index, ce->name, ce->ce_namelen);
if (pos >= 0) {
struct cache_entry *old = the_repository->index->cache[pos];
if (ce->ce_mode == old->ce_mode &&
Expand All @@ -182,10 +225,11 @@ static int update_some(const struct object_id *oid, struct strbuf *base,
return 0;
}

static int read_tree_some(struct tree *tree, const struct pathspec *pathspec)
static int read_tree_some(struct tree *tree, const struct pathspec *pathspec,
int overlay_mode)
{
read_tree(the_repository, tree,
pathspec, update_some, NULL);
pathspec, update_some, &overlay_mode);

/* update the index with the given tree's info
* for all args, expanding wildcards, and exit
Expand Down Expand Up @@ -580,7 +624,8 @@ static int checkout_paths(const struct checkout_opts *opts,
return error(_("index file corrupt"));

if (opts->source_tree)
read_tree_some(opts->source_tree, &opts->pathspec);
read_tree_some(opts->source_tree, &opts->pathspec,
opts->overlay_mode);
if (opts->merge)
unmerge_index(the_repository->index, &opts->pathspec, CE_MATCHED);

Expand Down
50 changes: 50 additions & 0 deletions t/t1092-sparse-checkout-compatibility.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2573,4 +2573,54 @@ test_expect_success 'sparse-index is not expanded: merge-ours' '
ensure_not_expanded merge -s ours merge-right
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Junio C Hamano wrote on the Git mailing list (how to reply to this email):

"Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Derrick Stolee <stolee@gmail.com>
>
> A user reported that 'git restore --staged .' causes the sparse index to
> expand. This is somewhat natural because the '.' pathspec means 'check
> every path'. However, the restore will not update paths marked with the
> SKIP_WORKTREE bit, so we shouldn't need to process such entries.

Interesting.  So, ideally we should be able to say "we are doing
everything because the user gave us '.' from the top level of the
working tree, so let's see each entry and decide what to do.  Ah we
have this tree entry in this sparse index, and that is outside the
directories we are dealing with in this working tree that is
sparsely checked out, so we would skip", and for that we have no
need to expand the index.  But in reality, what happens is "OK, '.'
so we need to deal with everything. Let's expand.", which would
break the contents of such a "skipped" tree out to constituent
paths, all of which inherits the SKIP_WORKTREE bit to tell us that
these paths are outside the directories we are dealing with".

The end result in the working tree should be the same, but we
unnecessarily expand the index.  Correctness wins with a room for
improvement in the performance, which is what we want to see and
then improve ;-)  Nice.

> For now, establish the current behavior, including the sparse index
> expansion, in the t1092 test case as a baseline.
>
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
>  t/t1092-sparse-checkout-compatibility.sh | 50 ++++++++++++++++++++++++
>  1 file changed, 50 insertions(+)
>
> diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
> index d98cb4ac11..d69434e7ab 100755
> --- a/t/t1092-sparse-checkout-compatibility.sh
> +++ b/t/t1092-sparse-checkout-compatibility.sh
> @@ -2573,4 +2573,54 @@ test_expect_success 'sparse-index is not expanded: merge-ours' '
>  	ensure_not_expanded merge -s ours merge-right
>  '
>  
> +test_expect_success 'restore --staged with sparse definition' '
> +	init_repos &&
> +
> +	# Stage changes within the sparse definition
> +	test_all_match git checkout -b restore-staged-1 base &&
> +	test_all_match git reset --soft update-deep &&
> +	test_all_match git restore --staged . &&
> +	test_all_match git status --porcelain=v2 &&
> +	test_all_match git diff --cached
> +'
> +
> +test_expect_success 'restore --staged with outside sparse definition' '
> +	init_repos &&
> +
> +	# Stage changes that include paths outside the sparse definition.
> +	# Although the working tree differs between full and sparse checkouts
> +	# after restore, the state of the index should be the same.
> +	test_all_match git checkout -b restore-staged-2 base &&
> +	test_all_match git reset --soft update-folder1 &&
> +	test_sparse_match git restore --staged . &&
> +	git -C full-checkout restore --staged . &&
> +	test_all_match git ls-files -s -- folder1 &&
> +	test_all_match git diff --cached -- folder1
> +'
> +
> +test_expect_success 'restore --staged with wildcards' '
> +	init_repos &&
> +
> +	test_all_match git checkout -b restore-staged-3 base &&
> +	test_all_match git reset --soft update-deep &&
> +	test_all_match git restore --staged "deep/*" &&
> +	test_all_match git status --porcelain=v2 &&
> +	test_all_match git diff --cached
> +'
> +
> +test_expect_success 'sparse-index is expanded: restore --staged' '
> +	init_repos &&
> +
> +	git -C sparse-index checkout -b restore-staged-exp base &&
> +	git -C sparse-index reset --soft update-folder1 &&
> +	ensure_expanded restore --staged .
> +'
> +
> +test_expect_success 'sparse-index is expanded: restore --source --staged' '
> +	init_repos &&
> +
> +	git -C sparse-index checkout -b restore-source-staged base &&
> +	ensure_expanded restore --source update-folder1 --staged .
> +'
> +
>  test_done

'

test_expect_success 'restore --staged with sparse definition' '
init_repos &&

# Stage changes within the sparse definition
test_all_match git checkout -b restore-staged-1 base &&
test_all_match git reset --soft update-deep &&
test_all_match git restore --staged . &&
test_all_match git status --porcelain=v2 &&
test_all_match git diff --cached
'

test_expect_success 'restore --staged with outside sparse definition' '
init_repos &&

# Stage changes that include paths outside the sparse definition.
# Although the working tree differs between full and sparse checkouts
# after restore, the state of the index should be the same.
test_all_match git checkout -b restore-staged-2 base &&
test_all_match git reset --soft update-folder1 &&
test_sparse_match git restore --staged . &&
git -C full-checkout restore --staged . &&
test_all_match git ls-files -s -- folder1 &&
test_all_match git diff --cached -- folder1
'

test_expect_success 'restore --staged with wildcards' '
init_repos &&

test_all_match git checkout -b restore-staged-3 base &&
test_all_match git reset --soft update-deep &&
test_all_match git restore --staged "deep/*" &&
test_all_match git status --porcelain=v2 &&
test_all_match git diff --cached
'

test_expect_success 'sparse-index is not expanded: restore --staged' '
init_repos &&

git -C sparse-index checkout -b restore-staged-exp base &&
git -C sparse-index reset --soft update-folder1 &&
ensure_not_expanded restore --staged .
'

test_expect_success 'sparse-index is not expanded: restore --source --staged' '
init_repos &&

git -C sparse-index checkout -b restore-source-staged base &&
ensure_not_expanded restore --source update-folder1 --staged .
'

test_done
Loading