diff --git a/pygit2/__init__.py b/pygit2/__init__.py index 518f3436..52ff9638 100644 --- a/pygit2/__init__.py +++ b/pygit2/__init__.py @@ -286,7 +286,6 @@ _cache_enums, discover_repository, filter_register, - filter_unregister, hash, hashfile, init_file_backend, @@ -545,6 +544,27 @@ def clone_repository( return Repository._from_c(crepo[0], owned=True) +def filter_unregister(name: str) -> None: + """ + Unregister the given filter. + + Note that the filter registry is not thread safe. Any registering or + deregistering of filters should be done outside of any possible usage + of the filters. + + In particular, any FilterLists that use the filter must have been garbage + collected before you can unregister the filter. + """ + from .filter import FilterList + + if FilterList._is_filter_in_use(name): + raise RuntimeError(f"filter still in use: '{name}'") + + c_name = to_bytes(name) + err = C.git_filter_unregister(c_name) + check_error(err) + + tree_entry_key = functools.cmp_to_key(tree_entry_cmp) settings = Settings() @@ -610,7 +630,6 @@ def clone_repository( # Low Level API (not present in .pyi) 'FilterSource', 'filter_register', - 'filter_unregister', 'GIT_APPLY_LOCATION_BOTH', 'GIT_APPLY_LOCATION_INDEX', 'GIT_APPLY_LOCATION_WORKDIR', diff --git a/pygit2/_libgit2/ffi.pyi b/pygit2/_libgit2/ffi.pyi index e73a7ad7..a0710cae 100644 --- a/pygit2/_libgit2/ffi.pyi +++ b/pygit2/_libgit2/ffi.pyi @@ -123,6 +123,10 @@ class GitBlameC: # incomplete pass +class GitBlobC: + # incomplete + pass + class GitMergeOptionsC: file_favor: int flags: int @@ -177,6 +181,10 @@ class GitDescribeOptionsC: class GitDescribeResultC: pass +class GitFilterListC: + # opaque struct + pass + class GitIndexC: pass @@ -264,6 +272,8 @@ def new(a: Literal['git_oid *']) -> GitOidC: ... @overload def new(a: Literal['git_blame **']) -> _Pointer[GitBlameC]: ... @overload +def new(a: Literal['git_blob **']) -> _Pointer[GitBlobC]: ... +@overload def new(a: Literal['git_clone_options *']) -> GitCloneOptionsC: ... @overload def new(a: Literal['git_merge_options *']) -> GitMergeOptionsC: ... @@ -318,6 +328,8 @@ def new(a: Literal['git_signature *']) -> GitSignatureC: ... @overload def new(a: Literal['git_signature **']) -> _Pointer[GitSignatureC]: ... @overload +def new(a: Literal['git_filter_list **']) -> _Pointer[GitFilterListC]: ... +@overload def new(a: Literal['int *']) -> int_c: ... @overload def new(a: Literal['int64_t *']) -> int64_t: ... diff --git a/pygit2/_pygit2.pyi b/pygit2/_pygit2.pyi index 0dda0636..a30a911a 100644 --- a/pygit2/_pygit2.pyi +++ b/pygit2/_pygit2.pyi @@ -857,6 +857,5 @@ def reference_is_valid_name(refname: str) -> bool: ... def tree_entry_cmp(a: Object, b: Object) -> int: ... def _cache_enums() -> None: ... def filter_register(name: str, filter: type[Filter]) -> None: ... -def filter_unregister(name: str) -> None: ... _OidArg = str | Oid diff --git a/pygit2/_run.py b/pygit2/_run.py index 863565f6..85d31f69 100644 --- a/pygit2/_run.py +++ b/pygit2/_run.py @@ -77,6 +77,7 @@ 'net.h', 'refspec.h', 'repository.h', + 'filter.h', 'commit.h', 'revert.h', 'stash.h', @@ -96,6 +97,7 @@ C_PREAMBLE = """\ #include #include +#include """ # ffi diff --git a/pygit2/decl/filter.h b/pygit2/decl/filter.h new file mode 100644 index 00000000..51734687 --- /dev/null +++ b/pygit2/decl/filter.h @@ -0,0 +1,49 @@ +typedef enum { + GIT_FILTER_TO_WORKTREE = ..., + GIT_FILTER_TO_ODB = ..., +} git_filter_mode_t; + +typedef enum { + GIT_FILTER_DEFAULT = ..., + GIT_FILTER_ALLOW_UNSAFE = ..., + GIT_FILTER_NO_SYSTEM_ATTRIBUTES = ..., + GIT_FILTER_ATTRIBUTES_FROM_HEAD = ..., + GIT_FILTER_ATTRIBUTES_FROM_COMMIT = ..., +} git_filter_flag_t; + +int git_filter_unregister( + const char *name); + +int git_filter_list_load( + git_filter_list **filters, + git_repository *repo, + git_blob *blob, + const char *path, + git_filter_mode_t mode, + uint32_t flags); + +int git_filter_list_contains( + git_filter_list *filters, + const char *name); + +int git_filter_list_apply_to_buffer( + git_buf *out, + git_filter_list *filters, + const char* in, + size_t in_len); + +int git_filter_list_apply_to_file( + git_buf *out, + git_filter_list *filters, + git_repository *repo, + const char *path); + +int git_filter_list_apply_to_blob( + git_buf *out, + git_filter_list *filters, + git_blob *blob); + +size_t git_filter_list_length( + const git_filter_list *fl); + +void git_filter_list_free(git_filter_list *filters); diff --git a/pygit2/decl/types.h b/pygit2/decl/types.h index 0bef96d3..64d7ce0a 100644 --- a/pygit2/decl/types.h +++ b/pygit2/decl/types.h @@ -1,6 +1,8 @@ +typedef struct git_blob git_blob; typedef struct git_commit git_commit; typedef struct git_annotated_commit git_annotated_commit; typedef struct git_config git_config; +typedef struct git_filter_list git_filter_list; typedef struct git_index git_index; typedef struct git_index_conflict_iterator git_index_conflict_iterator; typedef struct git_object git_object; diff --git a/pygit2/filter.py b/pygit2/filter.py index 5c89a0d1..6fc953e3 100644 --- a/pygit2/filter.py +++ b/pygit2/filter.py @@ -23,9 +23,20 @@ # the Free Software Foundation, 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301, USA. +from __future__ import annotations + +import weakref from collections.abc import Callable +from typing import TYPE_CHECKING + +from ._pygit2 import Blob, FilterSource +from .errors import check_error +from .ffi import C, ffi +from .utils import to_bytes -from ._pygit2 import FilterSource +if TYPE_CHECKING: + from ._libgit2.ffi import GitFilterListC + from .repository import BaseRepository class Filter: @@ -107,3 +118,90 @@ def close(self, write_next: Callable[[bytes], None]) -> None: Any remaining filtered output data must be written to `write_next` before returning. """ + + +class FilterList: + _all_filter_lists: set[weakref.ReferenceType[FilterList]] = set() + + _pointer: GitFilterListC + + @classmethod + def _from_c(cls, ptr: GitFilterListC): + if ptr == ffi.NULL: + return None + + fl = cls.__new__(cls) + fl._pointer = ptr + + # Keep track of this FilterList until it's garbage collected. This lets + # `filter_unregister` ensure the user isn't trying to delete a filter + # that's still in use. + ref = weakref.ref(fl, FilterList._all_filter_lists.remove) + FilterList._all_filter_lists.add(ref) + + return fl + + @classmethod + def _is_filter_in_use(cls, name: str) -> bool: + for ref in cls._all_filter_lists: + fl = ref() + if fl is not None and name in fl: + return True + return False + + def __contains__(self, name: str) -> bool: + if not isinstance(name, str): + raise TypeError('argument must be str') + c_name = to_bytes(name) + result = C.git_filter_list_contains(self._pointer, c_name) + return bool(result) + + def __len__(self) -> int: + return C.git_filter_list_length(self._pointer) + + def apply_to_buffer(self, data: bytes) -> bytes: + """ + Apply a filter list to a data buffer. + Return the filtered contents. + """ + buf = ffi.new('git_buf *') + err = C.git_filter_list_apply_to_buffer(buf, self._pointer, data, len(data)) + check_error(err) + try: + return ffi.string(buf.ptr) + finally: + C.git_buf_dispose(buf) + + def apply_to_file(self, repo: BaseRepository, path: str) -> bytes: + """ + Apply a filter list to the contents of a file on disk. + Return the filtered contents. + """ + buf = ffi.new('git_buf *') + c_path = to_bytes(path) + err = C.git_filter_list_apply_to_file(buf, self._pointer, repo._repo, c_path) + check_error(err) + try: + return ffi.string(buf.ptr) + finally: + C.git_buf_dispose(buf) + + def apply_to_blob(self, blob: Blob) -> bytes: + """ + Apply a filter list to a data buffer. + Return the filtered contents. + """ + buf = ffi.new('git_buf *') + + c_blob = ffi.new('git_blob **') + ffi.buffer(c_blob)[:] = blob._pointer[:] + + err = C.git_filter_list_apply_to_blob(buf, self._pointer, c_blob[0]) + check_error(err) + try: + return ffi.string(buf.ptr) + finally: + C.git_buf_dispose(buf) + + def __del__(self): + C.git_filter_list_free(self._pointer) diff --git a/pygit2/repository.py b/pygit2/repository.py index 18caa5e7..2605eee3 100644 --- a/pygit2/repository.py +++ b/pygit2/repository.py @@ -64,6 +64,7 @@ DescribeStrategy, DiffOption, FileMode, + FilterMode, MergeFavor, MergeFileFlag, MergeFlag, @@ -73,6 +74,7 @@ ) from .errors import check_error from .ffi import C, ffi +from .filter import FilterList from .index import Index, IndexEntry, MergeFileResult from .packbuilder import PackBuilder from .references import References @@ -235,6 +237,31 @@ def hashfile( oid = Oid(raw=bytes(ffi.buffer(c_oid.id)[:])) return oid + def load_filter_list( + self, path: str, mode: FilterMode = FilterMode.TO_ODB + ) -> FilterList | None: + """ + Load the filter list for a given path. + May return None if there are no filters to apply to this path. + + Parameters: + + path + Relative path of the file to be filtered + + mode + Filtering direction: ODB to worktree (SMUDGE), or worktree to ODB + (CLEAN). + """ + c_filters = ffi.new('git_filter_list **') + c_path = to_bytes(path) + c_mode = int(mode) + + err = C.git_filter_list_load(c_filters, self._repo, ffi.NULL, c_path, c_mode, 0) + check_error(err) + fl = FilterList._from_c(c_filters[0]) + return fl + def __iter__(self) -> Iterator[Oid]: return iter(self.odb) diff --git a/src/pygit2.c b/src/pygit2.c index 2649efc0..215d2a2f 100644 --- a/src/pygit2.c +++ b/src/pygit2.c @@ -345,31 +345,6 @@ filter_register(PyObject *self, PyObject *args, PyObject *kwds) return result; } -PyDoc_STRVAR(filter_unregister__doc__, - "filter_unregister(name: str) -> None\n" - "\n" - "Unregister the given filter.\n" - "\n" - "Note that the filter registry is not thread safe. Any registering or\n" - "deregistering of filters should be done outside of any possible usage\n" - "of the filters.\n"); - -PyObject * -filter_unregister(PyObject *self, PyObject *args) -{ - const char *name; - Py_ssize_t size; - int err; - - if (!PyArg_ParseTuple(args, "s#", &name, &size)) - return NULL; - if ((err = git_filter_unregister(name)) < 0) - return Error_set(err); - - Py_RETURN_NONE; -} - - static void forget_enums(void) { @@ -441,7 +416,6 @@ PyMethodDef module_methods[] = { {"reference_is_valid_name", reference_is_valid_name, METH_O, reference_is_valid_name__doc__}, {"tree_entry_cmp", tree_entry_cmp, METH_VARARGS, tree_entry_cmp__doc__}, {"filter_register", (PyCFunction)filter_register, METH_VARARGS | METH_KEYWORDS, filter_register__doc__}, - {"filter_unregister", filter_unregister, METH_VARARGS, filter_unregister__doc__}, {"_cache_enums", _cache_enums, METH_NOARGS, _cache_enums__doc__}, {NULL} }; diff --git a/test/test_filter.py b/test/test_filter.py index 26a45095..c4338489 100644 --- a/test/test_filter.py +++ b/test/test_filter.py @@ -1,4 +1,5 @@ import codecs +import gc from collections.abc import Callable, Generator from io import BytesIO @@ -6,7 +7,7 @@ import pygit2 from pygit2 import Blob, Filter, FilterSource, Repository -from pygit2.enums import BlobFilter +from pygit2.enums import BlobFilter, FilterMode from pygit2.errors import Passthrough @@ -56,32 +57,34 @@ class _UnmatchedFilter(_Rot13Filter): attributes = 'filter=rot13' +def _filter_fixture(name: str, filter: type[Filter]) -> Generator[None, None, None]: + pygit2.filter_register(name, filter) + yield + + # Collect any FilterLists that may use this filter before unregistering it + gc.collect() + + pygit2.filter_unregister(name) + + @pytest.fixture def rot13_filter() -> Generator[None, None, None]: - pygit2.filter_register('rot13', _Rot13Filter) - yield - pygit2.filter_unregister('rot13') + yield from _filter_fixture('rot13', _Rot13Filter) @pytest.fixture def passthrough_filter() -> Generator[None, None, None]: - pygit2.filter_register('passthrough-rot13', _PassthroughFilter) - yield - pygit2.filter_unregister('passthrough-rot13') + yield from _filter_fixture('passthrough-rot13', _PassthroughFilter) @pytest.fixture def buffered_filter() -> Generator[None, None, None]: - pygit2.filter_register('buffered-rot13', _BufferedFilter) - yield - pygit2.filter_unregister('buffered-rot13') + yield from _filter_fixture('buffered-rot13', _BufferedFilter) @pytest.fixture def unmatched_filter() -> Generator[None, None, None]: - pygit2.filter_register('unmatched-rot13', _UnmatchedFilter) - yield - pygit2.filter_unregister('unmatched-rot13') + yield from _filter_fixture('unmatched-rot13', _UnmatchedFilter) def test_filter(testrepo: Repository, rot13_filter: Filter) -> None: @@ -136,3 +139,94 @@ def test_filter_cleanup(dirtyrepo: Repository, rot13_filter: Filter) -> None: # Indirectly test that pygit2_filter_cleanup has the GIL # before calling pygit2_filter_payload_free. dirtyrepo.diff() + + +def test_filterlist_none(testrepo: Repository) -> None: + fl = testrepo.load_filter_list('hello.txt') + assert fl is None + + +def test_filterlist_apply_to_buffer_crlf_clean(testrepo: Repository) -> None: + testrepo.config['core.autocrlf'] = True + + fl = testrepo.load_filter_list('whatever.txt', mode=FilterMode.CLEAN) + assert fl is not None + assert len(fl) == 1 + assert 'crlf' in fl + assert 'bogus_filter_name' not in fl + with pytest.raises(TypeError): + 1234 in fl # type: ignore + + filtered = fl.apply_to_buffer(b'hello\r\nworld\r\n') + assert filtered == b'hello\nworld\n' + + +def test_filterlist_apply_to_buffer_crlf_smudge(testrepo: Repository) -> None: + testrepo.config['core.autocrlf'] = True + + fl = testrepo.load_filter_list('whatever.txt', mode=FilterMode.SMUDGE) + assert fl is not None + assert len(fl) == 1 + assert 'crlf' in fl + + filtered = fl.apply_to_buffer(b'hello\nworld\n') + assert filtered == b'hello\r\nworld\r\n' + + +def test_filterlist_dangerous_unregister(testrepo: Repository) -> None: + pygit2.filter_register('rot13', _Rot13Filter) + + fl = testrepo.load_filter_list('hello.txt') + assert fl is not None + assert len(fl) == 1 + assert 'rot13' in fl + + # Unregistering a filter that's still in use in a FilterList is dangerous! + # Our built-in check (that raises RuntimeError) may avert a segfault. + with pytest.raises(RuntimeError): + pygit2.filter_unregister('rot13') + + # Delete any FilterLists that use the filter, and only then is it safe + # to unregister the filter. + del fl + gc.collect() + pygit2.filter_unregister('rot13') + + +def test_filterlist_apply_to_file(testrepo: Repository, rot13_filter: Filter) -> None: + fl = testrepo.load_filter_list('bye.txt') + assert fl is not None + assert len(fl) == 1 + assert 'rot13' in fl + + filtered = fl.apply_to_file(testrepo, 'bye.txt') + assert filtered == b'olr jbeyq\n' + + +def test_filterlist_apply_to_blob(testrepo: Repository, rot13_filter: Filter) -> None: + fl = testrepo.load_filter_list('whatever.txt') + assert fl is not None + assert len(fl) == 1 + assert 'rot13' in fl + + blob_oid = testrepo.create_blob(b'bye world\n') + blob = testrepo[blob_oid] + assert isinstance(blob, Blob) + + filtered = fl.apply_to_blob(blob) + assert filtered == b'olr jbeyq\n' + + +def test_filterlist_apply_to_buffer_multiple( + testrepo: Repository, rot13_filter: Filter +) -> None: + testrepo.config['core.autocrlf'] = True + + fl = testrepo.load_filter_list('whatever.txt') + assert fl is not None + assert len(fl) == 2 + assert 'crlf' in fl + assert 'rot13' in fl + + filtered = fl.apply_to_buffer(b'bye\r\nworld\r\n') + assert filtered == b'olr\njbeyq\n'