Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions src/specify_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,15 @@ def save_init_options(project_path: Path, options: dict[str, Any]) -> None:
"""
dest = project_path / INIT_OPTIONS_FILE
dest.parent.mkdir(parents=True, exist_ok=True)
dest.write_text(json.dumps(options, indent=2, sort_keys=True))
# Pin UTF-8 explicitly: ``Path.write_text`` defaults to the system
# locale codec, which is cp1252 / gb2312 / cp932 on Windows. A
# locale-encoded write succeeds locally but produces a file a peer
# machine (different locale) or Unix CI cannot decode. The sibling
# integration-catalog writer in ``integrations/catalog.py`` already
# pins ``encoding="utf-8"`` for the same reason.
dest.write_text(
json.dumps(options, indent=2, sort_keys=True), encoding="utf-8"
)


def load_init_options(project_path: Path) -> dict[str, Any]:
Expand All @@ -409,8 +417,15 @@ def load_init_options(project_path: Path) -> dict[str, Any]:
if not path.exists():
return {}
try:
return json.loads(path.read_text())
except (json.JSONDecodeError, OSError):
# Match the explicit UTF-8 used by ``save_init_options``; without
# it ``read_text`` falls back to the system codec on Windows and
# raises ``UnicodeDecodeError`` on any file a peer wrote with
# non-ASCII content. ``UnicodeDecodeError`` is a subclass of
# ``ValueError``, not ``OSError`` / ``json.JSONDecodeError``, so
# it must be listed explicitly here to preserve the existing
# "fall back to empty dict" contract.
return json.loads(path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError, UnicodeDecodeError):
return {}


Expand Down
8 changes: 7 additions & 1 deletion src/specify_cli/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,13 @@ def _load_extensionignore(source_dir: Path) -> Optional[Callable[[str, List[str]
if not ignore_file.exists():
return None

lines: List[str] = ignore_file.read_text().splitlines()
# Pin UTF-8 explicitly: ``Path.read_text`` defaults to the system
# locale codec on Windows (cp1252 / gb2312 / cp932), which silently
# corrupts multibyte patterns when the file is shared across
# machines with different locales. The next line already
# normalises backslashes "so Windows-authored files work" — the
# codebase already expects Windows authors to write this file.
lines: List[str] = ignore_file.read_text(encoding="utf-8").splitlines()

# Normalise backslashes in patterns so Windows-authored files work
normalised: List[str] = []
Expand Down
46 changes: 44 additions & 2 deletions tests/test_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3147,9 +3147,13 @@ def _make_extension(self, temp_dir, valid_manifest_data, extra_files=None, ignor
else:
p.write_text(content)

# Write .extensionignore
# Write .extensionignore. Pinned to UTF-8 so non-ASCII patterns
# in tests (see ``test_extensionignore_utf8_patterns``) survive
# the round-trip on Windows runners with non-UTF-8 default locales.
if ignore_content is not None:
(ext_dir / ".extensionignore").write_text(ignore_content)
(ext_dir / ".extensionignore").write_text(
ignore_content, encoding="utf-8"
)

return ext_dir

Expand Down Expand Up @@ -3379,6 +3383,44 @@ def test_extensionignore_windows_backslash_patterns(self, temp_dir, valid_manife
assert (dest / "docs" / "guide.md").exists()
assert not (dest / "docs" / "internal" / "draft.md").exists()

def test_extensionignore_utf8_patterns(self, temp_dir, valid_manifest_data):
"""Non-ASCII patterns in .extensionignore work on every locale.

``Path.read_text`` defaults to the system locale codec on Windows
(cp1252 / gb2312 / cp932). Without an explicit ``encoding="utf-8"``,
a pattern like ``ドキュメント/`` written by a UTF-8 host becomes
mojibake on a cp1252 host and silently fails to match — leaking
files the author intended to exclude. The existing
``test_extensionignore_windows_backslash_patterns`` already shows
the codebase treats this as a Windows-author-friendly file; UTF-8
is part of that same contract.
"""
ext_dir = self._make_extension(
temp_dir,
valid_manifest_data,
extra_files={
"ドキュメント/private.md": "secret",
"ドキュメント/public.md": "public",
"docs/guide.md": "# Guide",
"café/résumé.txt": "draft",
},
ignore_content="ドキュメント/\ncafé/\n",
)

proj_dir = temp_dir / "project"
proj_dir.mkdir()
(proj_dir / ".specify").mkdir()

manager = ExtensionManager(proj_dir)
manager.install_from_directory(ext_dir, "0.1.0", register_commands=False)

dest = proj_dir / ".specify" / "extensions" / "test-ext"
# Multibyte patterns excluded.
assert not (dest / "ドキュメント").exists()
assert not (dest / "café").exists()
# ASCII path with no matching pattern is unaffected.
assert (dest / "docs" / "guide.md").exists()

def test_extensionignore_star_does_not_cross_directories(self, temp_dir, valid_manifest_data):
"""'*' should NOT match across directory boundaries (gitignore semantics)."""
ext_dir = self._make_extension(
Expand Down
39 changes: 39 additions & 0 deletions tests/test_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2269,6 +2269,45 @@ def test_load_returns_empty_on_invalid_json(self, project_dir):

assert load_init_options(project_dir) == {}

@pytest.mark.parametrize(
"value",
["名前-プロジェクト", "café-résumé", "Ωmega-Δelta", "🚀-launch"],
)
def test_save_load_round_trip_preserves_non_ascii(self, project_dir, value):
"""Non-ASCII values round-trip via explicit UTF-8 encoding.
``Path.write_text`` / ``Path.read_text`` default to the system
locale codec on Windows (cp1252 / gb2312 / cp932). Without
``encoding="utf-8"`` pinned on both ends, a project name like
``café`` written on a UTF-8 host becomes garbled or unreadable on
a cp1252 host (and vice versa). Pin UTF-8 explicitly so init
options round-trip across machines and CI.
"""
from specify_cli import save_init_options, load_init_options

save_init_options(project_dir, {"ai": "claude", "project_name": value})

loaded = load_init_options(project_dir)
assert loaded["project_name"] == value

def test_load_returns_empty_on_locale_corrupted_file(self, project_dir):
"""A file written in a non-UTF-8 codec falls back to {}, not crash.
Simulates a file produced by an old client (or by a peer machine
with a different default locale) that contains bytes invalid as
UTF-8. ``load_init_options`` should fall back to ``{}`` per the
existing contract — never propagate a raw ``UnicodeDecodeError``
to the CLI surface.
"""
from specify_cli import load_init_options

opts_file = project_dir / ".specify" / "init-options.json"
opts_file.parent.mkdir(parents=True, exist_ok=True)
# 0xE9 is 'é' in cp1252 but an invalid lead byte in UTF-8.
opts_file.write_bytes(b'{"project_name": "caf\xe9"}')

assert load_init_options(project_dir) == {}


class TestPresetSkills:
"""Tests for preset skill registration and unregistration.
Expand Down