From f3646ae6ecc4a16aad7feee0c3371ba27930a36d Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 28 Mar 2026 18:04:46 +0100 Subject: [PATCH] Fix yaml and json loaders This is mostly an issue on windows, but technically it affects every OS. I know about this issue so there's no excuse, although I guess this means there's no user on windows, or at least of the yaml loader on windows. The (well known, at least by me) problem where is that `open` in text mode (the default) will retrieve the encoding it uses via `locale.getencoding()` which is pretty much always the wrong thing to do but it is what it is[^1]. On most unices this is innocuous because they generally have utf-8 set as the locale encoding, but on windows it's generally going to fuck you up because it likely has a stupid ANSI locale set (only if you're really lucky has someone chanced setting 65001), leading to any non-ascii content potentially breaking file reading as the codepage will either not be able to decode it or will misread it. Both JSON and PyYAML are perfectly happy reading binary files, in which case they'll apply UTF-8 decoding and go on their merry way, so change the code to that. [^1]: it's finally being fixed in 3.15 by PEP 686 https://docs.python.org/3.15/whatsnew/3.15.html#whatsnew315-utf8-default --- src/ua_parser/loaders.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ua_parser/loaders.py b/src/ua_parser/loaders.py index 55774ea..38edff4 100644 --- a/src/ua_parser/loaders.py +++ b/src/ua_parser/loaders.py @@ -196,7 +196,7 @@ def load_json(f: PathOrFile, loader: DataLoader = load_data) -> Matchers: """ if isinstance(f, (str, os.PathLike)): - with open(f) as fp: + with open(f, "rb") as fp: regexes = json.load(fp) else: regexes = json.load(f) @@ -224,7 +224,7 @@ def load_yaml(path: PathOrFile, loader: DataLoader = load_data) -> Matchers: instead. """ if isinstance(path, (str, os.PathLike)): - with open(path) as fp: + with open(path, "rb") as fp: regexes = load(fp, Loader=SafeLoader) # type: ignore else: regexes = load(path, Loader=SafeLoader) # type: ignore