Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Documentation/config/diff.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,14 @@ endif::git-diff[]
Set this option to `true` to make the diff driver cache the text
conversion outputs. See linkgit:gitattributes[5] for details.

`diff.<driver>.process`::
The command to run as a long-running diff process.
The tool communicates via the pkt-line protocol and returns
hunks that are fed into Git's diff and blame pipelines.
If the tool returns zero hunks, the file is treated as
unchanged for both diff output and blame attribution.
See linkgit:gitattributes[5] for details.

`diff.indentHeuristic`::
Set this option to `false` to disable the default heuristics
that shift diff hunk boundaries to make patches easier to read.
Expand Down
43 changes: 43 additions & 0 deletions Documentation/gitattributes.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,49 @@ NOTE: If `diff.<name>.command` is defined for path with the
(see above), and adding `diff.<name>.algorithm` has no effect, as the
algorithm is not passed to the external diff driver.

Using an external diff process
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

An external tool can provide content-aware line matching by
setting `diff.<name>.process` to the command that runs
the tool. The tool is a long-running process that communicates via
the pkt-line protocol (see
linkgit:gitprotocol-long-running-process[5]).

------------------------
*.c diff=cdiff
------------------------

----------------------------------------------------------------
[diff "cdiff"]
process = /path/to/diff-process-tool
----------------------------------------------------------------

The tool receives file pairs and returns hunk descriptors indicating
which lines changed. Git feeds these hunks into its standard diff
pipeline, so all output features (word diff, function context,
color) work normally.

If the tool fails or returns an error, Git silently falls back to
the builtin diff algorithm. If the tool returns invalid hunks
(out of bounds, overlapping), Git also falls back silently.

The handshake negotiates `version=1` and `capability=hunks`.
Per-file requests send `command=hunks` and `pathname=<path>`,
followed by the old and new file content as packetized data.
The tool responds with lines of the form
`hunk <old_start> <old_count> <new_start> <new_count>`
(1-based line numbers), a flush packet, and `status=success`.

If the tool returns zero hunks with `status=success`, Git treats
the file as having no changes and produces no diff output.
`git blame` also consults the diff process and skips commits
where it reports zero hunks, attributing lines to earlier commits
instead.

Tools should ignore unknown keys in the per-file request to
remain forward-compatible.

Defining a custom hunk-header
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1142,6 +1142,7 @@ LIB_OBJS += diff-delta.o
LIB_OBJS += diff-merges.o
LIB_OBJS += diff-lib.o
LIB_OBJS += diff-no-index.o
LIB_OBJS += diff-process.o
LIB_OBJS += diff.o
LIB_OBJS += diffcore-break.o
LIB_OBJS += diffcore-delta.o
Expand Down
43 changes: 39 additions & 4 deletions blame.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "tag.h"
#include "trace2.h"
#include "blame.h"
#include "diff-process.h"
#include "userdiff.h"
#include "alloc.h"
#include "commit-slab.h"
#include "bloom.h"
Expand Down Expand Up @@ -315,16 +317,47 @@ static struct commit *fake_working_tree_commit(struct repository *r,


static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, int xdl_opts)
xdl_emit_hunk_consume_func_t hunk_func, void *cb_data,
int xdl_opts, struct index_state *istate,
const char *path)
{
xpparam_t xpp = {0};
xdemitconf_t xecfg = {0};
xdemitcb_t ecb = {NULL};
struct xdl_hunk *ext_hunks = NULL;
int ret;

xpp.flags = xdl_opts;
xecfg.hunk_func = hunk_func;
ecb.priv = cb_data;
return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);

if (path && istate) {
struct userdiff_driver *drv;
drv = userdiff_find_by_path(istate, path);
if (drv && drv->process) {
size_t nr = 0;
if (!diff_process_get_hunks(drv, path,
file_a->ptr, file_a->size,
file_b->ptr, file_b->size,
&ext_hunks, &nr)) {
if (!nr) {
/*
* Zero hunks: the diff process
* considers these files equivalent.
* Skip so blame looks past this
* commit.
*/
return 0;
}
xpp.external_hunks = ext_hunks;
xpp.external_hunks_nr = nr;
}
}
}

ret = xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
free(ext_hunks);
return ret;
}

static const char *get_next_line(const char *start, const char *end)
Expand Down Expand Up @@ -1961,7 +1994,8 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
&sb->num_read_blob, ignore_diffs);
sb->num_get_patch++;

if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts,
sb->revs->diffopt.repo->index, target->path))
die("unable to generate diff (%s -> %s)",
oid_to_hex(&parent->commit->object.oid),
oid_to_hex(&target->commit->object.oid));
Expand Down Expand Up @@ -2114,7 +2148,8 @@ static void find_copy_in_blob(struct blame_scoreboard *sb,
* file_p partially may match that image.
*/
memset(split, 0, sizeof(struct blame_entry [3]));
if (diff_hunks(file_p, &file_o, handle_split_cb, &d, sb->xdl_opts))
if (diff_hunks(file_p, &file_o, handle_split_cb, &d, sb->xdl_opts,
NULL, NULL))
die("unable to generate diff (%s)",
oid_to_hex(&parent->commit->object.oid));
/* remainder, if any, all match the preimage */
Expand Down
206 changes: 206 additions & 0 deletions diff-process.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/*
* Diff process backend: communicates with a long-running external
* tool via the pkt-line protocol to obtain custom line-matching
* results. Unlike textconv, which transforms the displayed content,
* hunks from a diff process reference original line numbers and
* the display shows the actual file content.
*
* Protocol: pkt-line over stdin/stdout, following the pattern of
* the long-running filter process protocol (see convert.c).
*
* Handshake:
* git> git-diff-client / version=1 / flush
* tool< git-diff-server / version=1 / flush
* git> capability=hunks / flush
* tool< capability=hunks / flush
*
* Per-file:
* git> command=hunks / pathname=<path> / flush
* git> <old content packetized> / flush
* git> <new content packetized> / flush
* tool< hunk <old_start> <old_count> <new_start> <new_count>
* tool< ... / flush
* tool< status=success / flush
*
* Zero hunks with status=success means the tool considers the
* files equivalent. Git will skip the diff for that file.
*/

#include "git-compat-util.h"
#include "diff-process.h"
#include "userdiff.h"
#include "sub-process.h"
#include "pkt-line.h"
#include "strbuf.h"
#include "xdiff/xdiff.h"

#define CAP_HUNKS (1u << 0)

struct diff_subprocess {
struct subprocess_entry subprocess;
unsigned int supported_capabilities;
};

static int subprocess_map_initialized;
static struct hashmap subprocess_map;

static int start_diff_process_fn(struct subprocess_entry *subprocess)
{
static int versions[] = { 1, 0 };
static struct subprocess_capability capabilities[] = {
{ "hunks", CAP_HUNKS },
{ NULL, 0 }
};
struct diff_subprocess *entry =
(struct diff_subprocess *)subprocess;

/* Uses dying pkt-line variant, same as convert.c filters. */
return subprocess_handshake(subprocess, "git-diff",
versions, NULL,
capabilities,
&entry->supported_capabilities);
}

static struct diff_subprocess *find_or_start_process(const char *cmd)
{
struct diff_subprocess *entry;

if (!subprocess_map_initialized) {
subprocess_map_initialized = 1;
hashmap_init(&subprocess_map, cmd2process_cmp, NULL, 0);
}

entry = (struct diff_subprocess *)
subprocess_find_entry(&subprocess_map, cmd);
if (entry)
return entry;

entry = xcalloc(1, sizeof(*entry));
if (subprocess_start(&subprocess_map, &entry->subprocess,
cmd, start_diff_process_fn)) {
free(entry);
return NULL;
}

return entry;
}

static int send_file_content(int fd, const char *buf, long size)
{
int ret;

if (size > 0)
ret = write_packetized_from_buf_no_flush(buf, size, fd);
else
ret = 0;
if (ret)
return ret;
return packet_flush_gently(fd);
}

static int parse_hunk_line(const char *line, struct xdl_hunk *hunk)
{
char *end;

/* Format: "hunk <old_start> <old_count> <new_start> <new_count>" */
if (!skip_prefix(line, "hunk ", &line))
return -1;

hunk->old_start = strtol(line, &end, 10);
if (end == line || *end != ' ')
return -1;
line = end;

hunk->old_count = strtol(line, &end, 10);
if (end == line || *end != ' ')
return -1;
line = end;

hunk->new_start = strtol(line, &end, 10);
if (end == line || *end != ' ')
return -1;
line = end;

hunk->new_count = strtol(line, &end, 10);
if (end == line || *end != '\0')
return -1;

return 0;
}

int diff_process_get_hunks(struct userdiff_driver *drv,
const char *path,
const char *old_buf, long old_size,
const char *new_buf, long new_size,
struct xdl_hunk **hunks_out,
size_t *nr_hunks_out)
{
struct diff_subprocess *backend;
struct child_process *process;
int fd_in, fd_out;
struct strbuf status = STRBUF_INIT;
struct xdl_hunk *hunks = NULL;
struct xdl_hunk hunk;
size_t nr_hunks = 0, alloc_hunks = 0;
int len;
char *line;

if (!drv || !drv->process)
return -1;

backend = find_or_start_process(drv->process);
if (!backend)
return -1;

if (!(backend->supported_capabilities & CAP_HUNKS))
return -1;

process = subprocess_get_child_process(&backend->subprocess);
fd_in = process->in;
fd_out = process->out;

/* Send request */
if (packet_write_fmt_gently(fd_in, "command=hunks\n") ||
packet_write_fmt_gently(fd_in, "pathname=%s\n", path) ||
packet_flush_gently(fd_in))
goto error;

/* Send old file content */
if (send_file_content(fd_in, old_buf, old_size))
goto error;

/* Send new file content */
if (send_file_content(fd_in, new_buf, new_size))
goto error;

/* Read hunks until flush packet */
while ((len = packet_read_line_gently(fd_out, NULL, &line)) >= 0 &&
line) {
if (parse_hunk_line(line, &hunk) < 0)
goto error;
ALLOC_GROW(hunks, nr_hunks + 1, alloc_hunks);
hunks[nr_hunks++] = hunk;
}
if (len < 0)
goto error;

/* Read status */
if (subprocess_read_status(fd_out, &status))
goto error;

if (strcmp(status.buf, "success")) {
if (!strcmp(status.buf, "abort"))
backend->supported_capabilities &= ~CAP_HUNKS;
goto error;
}

*hunks_out = hunks;
*nr_hunks_out = nr_hunks;
strbuf_release(&status);
return 0;

error:
free(hunks);
strbuf_release(&status);
return -1;
}
28 changes: 28 additions & 0 deletions diff-process.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef DIFF_PROCESS_H
#define DIFF_PROCESS_H

struct userdiff_driver;
struct xdl_hunk;

/*
* Query a diff process for hunks describing the changes
* between old_buf and new_buf.
*
* The backend is a long-running subprocess configured via
* diff.<driver>.process. It receives file content via
* pkt-line and returns hunks with 1-based line numbers.
*
* On success, sets *hunks_out and *nr_hunks_out to a newly allocated
* array (caller must free) and returns 0.
*
* On failure, returns -1. The caller should fall back to the
* builtin diff algorithm.
*/
int diff_process_get_hunks(struct userdiff_driver *drv,
const char *path,
const char *old_buf, long old_size,
const char *new_buf, long new_size,
struct xdl_hunk **hunks_out,
size_t *nr_hunks_out);

#endif /* DIFF_PROCESS_H */
Loading
Loading