diff --git a/scripts/shell_completions/bash/borg b/scripts/shell_completions/bash/borg index f5d6ccba39..a010ddc8fc 100644 --- a/scripts/shell_completions/bash/borg +++ b/scripts/shell_completions/bash/borg @@ -75,7 +75,7 @@ _borg() local opts="-e --encryption --append-only --storage-quota --make-parent-dirs ${common_opts}" ;; *' create '*) - local opts="-n --dry-run -s --stats --list --filter --json --no-cache-sync --stdin-name --content-from-command -e --exclude --exclude-from --pattern --patterns-from --exclude-caches --exclude-if-present --keep-exclude-tags --exclude-nodump -x --one-file-system --numeric-owner --noatime --noctime --nobirthtime --nobsdflags --noacls --noxattrs --noflags --files-cache --read-special --comment --timestamp -c --checkpoint-interval --chunker-params -C --compression ${common_opts}" + local opts="-n --dry-run -s --stats --list --filter --json --no-cache-sync --stdin-name --content-from-command -e --exclude --exclude-from --pattern --patterns-from --exclude-caches --exclude-if-present --keep-exclude-tags --exclude-nodump --exclude-dataless -x --one-file-system --numeric-owner --noatime --noctime --nobirthtime --nobsdflags --noacls --noxattrs --noflags --files-cache --read-special --comment --timestamp -c --checkpoint-interval --chunker-params -C --compression ${common_opts}" ;; *' extract '*) local opts="--list -n --dry-run --numeric-owner --nobsdflags --noacls --noxattrs --stdout --sparse -e --exclude --exclude-from --pattern --patterns-from --strip-components ${common_opts}" diff --git a/scripts/shell_completions/fish/borg.fish b/scripts/shell_completions/fish/borg.fish index c1465871a4..b07cd73387 100644 --- a/scripts/shell_completions/fish/borg.fish +++ b/scripts/shell_completions/fish/borg.fish @@ -111,6 +111,7 @@ complete -c borg -l 'exclude-if-present' -d 'Exclude directories that complete -c borg -f -l 'keep-exclude-tags' -d 'Keep tag files of excluded directories' -n "__fish_seen_subcommand_from create" complete -c borg -f -l 'keep-tag-files' -d 'Keep tag files of excluded directories' -n "__fish_seen_subcommand_from create" complete -c borg -f -l 'exclude-nodump' -d 'Exclude files flagged NODUMP' -n "__fish_seen_subcommand_from create" +complete -c borg -f -l 'exclude-dataless' -d 'Exclude files flagged DATALESS (macOS)' -n "__fish_seen_subcommand_from create" # Filesystem options complete -c borg -f -s x -l 'one-file-system' -d 'Stay in the same file system' -n "__fish_seen_subcommand_from create" complete -c borg -f -l 'numeric-owner' -d 'Only store numeric user:group identifiers' -n "__fish_seen_subcommand_from create" diff --git a/scripts/shell_completions/zsh/_borg b/scripts/shell_completions/zsh/_borg index 1599843aba..0577ed856d 100644 --- a/scripts/shell_completions/zsh/_borg +++ b/scripts/shell_completions/zsh/_borg @@ -147,6 +147,7 @@ _borg-create() { '--stdin-name=[use NAME in archive for stdin data (default: "stdin")]:NAME' \ '--content-from-command[interpret PATH as command and store its stdout]' \ '--exclude-nodump[exclude files flagged NODUMP]' \ + '--exclude-dataless[exclude files flagged DATALESS (macOS: not-locally-materialized cloud files)]' \ '(-x --one-file-system)'{-x,--one-file-system}'[stay in the same file system]' \ '--numeric-owner[only store numeric user and group identifiers]' \ '--noatime[do not store atime into archive]' \ diff --git a/src/borg/archiver.py b/src/borg/archiver.py index b8c5235a89..0eafecd886 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -109,6 +109,11 @@ STATS_HEADER = " Original size Compressed size Deduplicated size" +# macOS: SF_DATALESS marks dataless placeholder files (e.g. cloud files not materialized locally). +# Reading such files triggers downloading their content. stat.SF_DATALESS is only available +# from Python 3.13 on, thus we fall back to the value from macOS' sys/stat.h. +SF_DATALESS = getattr(stat, 'SF_DATALESS', 0x40000000) + PURE_PYTHON_MSGPACK_WARNING = "Using a pure-python msgpack! This will result in lower performance." @@ -703,6 +708,7 @@ def create_inner(archive, cache, fso): self.noacls = args.noacls self.noxattrs = args.noxattrs self.exclude_nodump = args.exclude_nodump + self.exclude_dataless = args.exclude_dataless dry_run = args.dry_run t0 = utcnow() t0_monotonic = time.monotonic() @@ -828,12 +834,18 @@ def _rec_walk(self, *, path, parent_fd, name, fso, cache, matcher, # directory of the mounted filesystem that shadows the mountpoint dir). recurse = restrict_dev is None or st.st_dev == restrict_dev - if self.exclude_nodump: - # Ignore if nodump flag is set + if self.exclude_nodump or self.exclude_dataless: with backup_io('flags'): - if get_flags(path=path, st=st) & stat.UF_NODUMP: - self.print_file_status('x', path) - return + flags = get_flags(path=path, st=st) + # Ignore if nodump flag is set + if self.exclude_nodump and flags & stat.UF_NODUMP: + self.print_file_status('x', path) + return + # Ignore if dataless flag is set (macOS: content not materialized locally, + # reading the file would trigger downloading it from cloud storage) + if self.exclude_dataless and flags & SF_DATALESS: + self.print_file_status('x', path) + return if not stat.S_ISDIR(st.st_mode): # directories cannot go in this branch because they can be excluded based on tag @@ -3930,6 +3942,9 @@ def define_borg_mount(parser): exclude_group = define_exclusion_group(subparser, tag_files=True) exclude_group.add_argument('--exclude-nodump', dest='exclude_nodump', action='store_true', help='exclude files flagged NODUMP') + exclude_group.add_argument('--exclude-dataless', dest='exclude_dataless', action='store_true', + help='exclude files flagged DATALESS (macOS: placeholder files whose content ' + 'is not materialized locally, e.g. not-downloaded cloud storage files)') fs_group = subparser.add_argument_group('Filesystem options') fs_group.add_argument('-x', '--one-file-system', dest='one_file_system', action='store_true', diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index ffeb8b50cd..46df739ff0 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -2191,6 +2191,27 @@ def test_file_status_excluded(self): if has_lchflags: self.assert_in("x input/file3", output) + def test_create_exclude_dataless(self): + """test that files flagged SF_DATALESS are excluded with --exclude-dataless""" + from ..archiver import SF_DATALESS + + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('cloudfile', size=1024 * 80) + + # SF_DATALESS cannot be set from userspace, so fake the flags lookup. + def fake_get_flags(path, st, fd=None): + return SF_DATALESS if path.endswith('cloudfile') else 0 + + self.cmd('init', '--encryption=repokey', self.repository_location) + with patch('borg.archiver.get_flags', fake_get_flags): + output = self.cmd('create', '--list', '--exclude-dataless', self.repository_location + '::test', 'input') + self.assert_in('A input/file1', output) + self.assert_in('x input/cloudfile', output) + # without --exclude-dataless, the file is backed up + with patch('borg.archiver.get_flags', fake_get_flags): + output = self.cmd('create', '--list', self.repository_location + '::test2', 'input') + self.assert_in('A input/cloudfile', output) + def test_create_json(self): self.create_regular_file('file1', size=1024 * 80) self.cmd('init', '--encryption=repokey', self.repository_location)