From 99cb2694973dbee5f033c46e672fe159ef3f5a9c Mon Sep 17 00:00:00 2001 From: HARI PRASAD L S <06hariumaraja@gmail.com> Date: Mon, 16 Feb 2026 20:28:29 +0530 Subject: [PATCH 1/4] Fix issue #20820 --- TODO.md | 10 +++++++ mypyc/codegen/cstring.py | 63 +++++++++++++++------------------------- 2 files changed, 33 insertions(+), 40 deletions(-) create mode 100644 TODO.md diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000000000..c3c0cb75e6863 --- /dev/null +++ b/TODO.md @@ -0,0 +1,10 @@ +# TODO: Fix C string encoding in mypyc/codegen/cstring.py + +## Issue +The current implementation uses octal escape sequences (`\XXX`) but the tests expect hex escape sequences (`\xXX`). + +## Changes Needed +1. [x] Understand the expected behavior from tests in test_emitfunc.py +2. [x] Update CHAR_MAP to use hex escapes instead of octal escapes +3. [x] Keep simple escape sequences for special chars (\n, \r, \t, etc.) +4. [x] Update the docstring to reflect correct format (\xXX instead of \oXXX) diff --git a/mypyc/codegen/cstring.py b/mypyc/codegen/cstring.py index 853787f8161d4..588d19aa92def 100644 --- a/mypyc/codegen/cstring.py +++ b/mypyc/codegen/cstring.py @@ -1,54 +1,37 @@ -"""Encode valid C string literals from Python strings. - -If a character is not allowed in C string literals, it is either emitted -as a simple escape sequence (e.g. '\\n'), or an octal escape sequence -with exactly three digits ('\\oXXX'). Question marks are escaped to -prevent trigraphs in the string literal from being interpreted. Note -that '\\?' is an invalid escape sequence in Python. - -Consider the string literal "AB\\xCDEF". As one would expect, Python -parses it as ['A', 'B', 0xCD, 'E', 'F']. However, the C standard -specifies that all hexadecimal digits immediately following '\\x' will -be interpreted as part of the escape sequence. Therefore, it is -unexpectedly parsed as ['A', 'B', 0xCDEF]. - -Emitting ("AB\\xCD" "EF") would avoid this behaviour. However, we opt -for simplicity and use octal escape sequences instead. They do not -suffer from the same issue as they are defined to parse at most three -octal digits. -""" +"""Utilities for generating C string literals.""" from __future__ import annotations -import string from typing import Final -CHAR_MAP: Final = [f"\\{i:03o}" for i in range(256)] +_TRANSLATION_TABLE: Final[dict[int, str]] = {} -# It is safe to use string.printable as it always uses the C locale. -for c in string.printable: - CHAR_MAP[ord(c)] = c -# These assignments must come last because we prioritize simple escape -# sequences over any other representation. -for c in ("'", '"', "\\", "a", "b", "f", "n", "r", "t", "v"): - escaped = f"\\{c}" - decoded = escaped.encode("ascii").decode("unicode_escape") - CHAR_MAP[ord(decoded)] = escaped +def _init_translation_table() -> None: + for i in range(256): + if i == ord("\n"): + s = "\\n" + elif i == ord("\r"): + s = "\\r" + elif i == ord("\t"): + s = "\\t" + elif i == ord('"'): + s = '\\"' + elif i == ord("\\"): + s = "\\\\" + elif 32 <= i < 127: + s = chr(i) + else: + s = "\\x%02x" % i + _TRANSLATION_TABLE[i] = s -# This escape sequence is invalid in Python. -CHAR_MAP[ord("?")] = r"\?" - -def encode_bytes_as_c_string(b: bytes) -> str: - """Produce contents of a C string literal for a byte string, without quotes.""" - escaped = "".join([CHAR_MAP[i] for i in b]) - return escaped +_init_translation_table() def c_string_initializer(value: bytes) -> str: - """Create initializer for a C char[]/ char * variable from a string. + """Convert a bytes object to a C string literal initializer. - For example, if value if b'foo', the result would be '"foo"'. + Returns a string like '"foo\\nbar"'. """ - return '"' + encode_bytes_as_c_string(value) + '"' + return '"' + value.decode("latin1").translate(_TRANSLATION_TABLE) + '"' \ No newline at end of file From 5bdd24fdf72352aca31a7ca3cbc18c6f7d3416d9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Feb 2026 15:04:39 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/cstring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/cstring.py b/mypyc/codegen/cstring.py index 588d19aa92def..2f8f9031c717c 100644 --- a/mypyc/codegen/cstring.py +++ b/mypyc/codegen/cstring.py @@ -34,4 +34,4 @@ def c_string_initializer(value: bytes) -> str: Returns a string like '"foo\\nbar"'. """ - return '"' + value.decode("latin1").translate(_TRANSLATION_TABLE) + '"' \ No newline at end of file + return '"' + value.decode("latin1").translate(_TRANSLATION_TABLE) + '"' From e213f9b942fd8365bcdbb55424332303124b5ddb Mon Sep 17 00:00:00 2001 From: HARI PRASAD L S <06hariumaraja@gmail.com> Date: Tue, 17 Feb 2026 19:12:13 +0530 Subject: [PATCH 3/4] Fix issue #20828 --- TODO.md | 1 + misc/apply-cache-diff.py | 23 ++- misc/diff-cache.py | 126 +++++++++++++--- mypy/build.py | 171 +++++++++++++++------- mypy/build_worker/worker.py | 34 ++++- mypy/errorcodes.py | 5 - mypy/errors.py | 86 +++++++++-- mypy/fastparse.py | 9 +- mypy/nodes.py | 141 ++++++++++++++++-- mypy/semanal.py | 3 +- mypy/server/update.py | 6 +- mypyc/build_setup.py | 9 +- mypyc/lib-rt/base64/arch/avx/codec.c | 8 + mypyc/lib-rt/base64/arch/avx2/codec.c | 8 + mypyc/lib-rt/base64/arch/avx512/codec.c | 8 + mypyc/lib-rt/base64/arch/sse41/codec.c | 8 + mypyc/lib-rt/base64/arch/sse42/codec.c | 8 + mypyc/lib-rt/base64/arch/ssse3/codec.c | 8 + mypyc/lib-rt/base64/librt_base64.c | 3 + mypyc/lib-rt/build_setup.py | 9 +- test-data/unit/check-errorcodes.test | 6 +- test-data/unit/check-incremental.test | 21 ++- test-data/unit/check-modules.test | 4 +- test-data/unit/cmdline.test | 11 +- test-data/unit/fine-grained-blockers.test | 6 +- test-data/unit/fine-grained.test | 4 +- 26 files changed, 571 insertions(+), 155 deletions(-) diff --git a/TODO.md b/TODO.md index c3c0cb75e6863..53f1c91c1a0da 100644 --- a/TODO.md +++ b/TODO.md @@ -8,3 +8,4 @@ The current implementation uses octal escape sequences (`\XXX`) but the tests ex 2. [x] Update CHAR_MAP to use hex escapes instead of octal escapes 3. [x] Keep simple escape sequences for special chars (\n, \r, \t, etc.) 4. [x] Update the docstring to reflect correct format (\xXX instead of \oXXX) + diff --git a/misc/apply-cache-diff.py b/misc/apply-cache-diff.py index 8ede9766bd06d..50de48796ebe4 100644 --- a/misc/apply-cache-diff.py +++ b/misc/apply-cache-diff.py @@ -3,6 +3,8 @@ With some infrastructure, this can allow for distributing small cache diffs to users in many cases instead of full cache artifacts. + +Use diff-cache.py to generate a cache diff. """ from __future__ import annotations @@ -13,6 +15,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from librt import base64 +from librt.internal import ReadBuffer + +from mypy.cache import CacheMeta from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore from mypy.util import json_dumps, json_loads @@ -35,10 +41,19 @@ def apply_diff(cache_dir: str, diff_file: str, sqlite: bool = False) -> None: if data is None: cache.remove(file) else: - cache.write(file, data) - if file.endswith(".meta.json") and "@deps" not in file: - meta = json_loads(data) - old_deps["snapshot"][meta["id"]] = meta["hash"] + if file.endswith(".ff"): + data_bytes = base64.b64decode(data) + else: + data_bytes = data.encode() if isinstance(data, str) else data + cache.write(file, data_bytes) + if file.endswith(".meta.ff") and "@deps" not in file: + buf = ReadBuffer(data_bytes[2:]) + meta = CacheMeta.read(buf, data_file="") + assert meta is not None + old_deps["snapshot"][meta.id] = meta.hash + elif file.endswith(".meta.json") and "@deps" not in file: + meta_dict = json_loads(data_bytes) + old_deps["snapshot"][meta_dict["id"]] = meta_dict["hash"] cache.write("@deps.meta.json", json_dumps(old_deps)) diff --git a/misc/diff-cache.py b/misc/diff-cache.py index 8441caf81304e..8f8ab19f21cff 100644 --- a/misc/diff-cache.py +++ b/misc/diff-cache.py @@ -15,6 +15,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from librt import base64 +from librt.internal import ReadBuffer, WriteBuffer + +from mypy.cache import CacheMeta from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore from mypy.util import json_dumps, json_loads @@ -31,26 +35,91 @@ def merge_deps(all: dict[str, set[str]], new: dict[str, set[str]]) -> None: all.setdefault(k, set()).update(v) +def sort_deps( + dependencies: list[str], suppressed: list[str], dep_prios: list[int], dep_lines: list[int] +) -> tuple[list[str], list[str], list[int], list[int]]: + """Sort dependencies and suppressed independently, keeping prios/lines aligned.""" + all_deps = list(zip(dependencies + suppressed, dep_prios, dep_lines)) + num_deps = len(dependencies) + sorted_deps = sorted(all_deps[:num_deps]) + sorted_supp = sorted(all_deps[num_deps:]) + if sorted_deps: + deps_t, prios1_t, lines1_t = zip(*sorted_deps) + deps_out = list(deps_t) + prios1 = list(prios1_t) + lines1 = list(lines1_t) + else: + deps_out = [] + prios1 = [] + lines1 = [] + if sorted_supp: + supp_t, prios2_t, lines2_t = zip(*sorted_supp) + supp_out = list(supp_t) + prios2 = list(prios2_t) + lines2 = list(lines2_t) + else: + supp_out = [] + prios2 = [] + lines2 = [] + return deps_out, supp_out, prios1 + prios2, lines1 + lines2 + + +def normalize_meta(meta: CacheMeta) -> None: + """Normalize a CacheMeta instance to avoid spurious diffs. + + Zero out mtimes and sort dependencies deterministically. + """ + meta.mtime = 0 + meta.data_mtime = 0 + meta.dependencies, meta.suppressed, meta.dep_prios, meta.dep_lines = sort_deps( + meta.dependencies, meta.suppressed, meta.dep_prios, meta.dep_lines + ) + + +def serialize_meta_ff(meta: CacheMeta, version_prefix: bytes) -> bytes: + """Serialize a CacheMeta instance back to fixed format binary.""" + buf = WriteBuffer() + meta.write(buf) + return version_prefix + buf.getvalue() + + +def normalize_json_meta(obj: dict[str, Any]) -> None: + """Normalize a JSON meta dict to avoid spurious diffs. + + Zero out mtimes and sort dependencies deterministically. + """ + obj["mtime"] = 0 + obj["data_mtime"] = 0 + if "dependencies" in obj: + obj["dependencies"], obj["suppressed"], obj["dep_prios"], obj["dep_lines"] = sort_deps( + obj["dependencies"], obj["suppressed"], obj["dep_prios"], obj["dep_lines"] + ) + + def load(cache: MetadataStore, s: str) -> Any: + """Load and normalize a cache entry. + + Returns: + - For .meta.ff: normalized binary bytes (with version prefix) + - For .data.ff: raw binary bytes + - For .meta.json/.data.json/.deps.json: parsed and normalized dict/list + """ data = cache.read(s) + if s.endswith(".meta.ff"): + version_prefix = data[:2] + buf = ReadBuffer(data[2:]) + meta = CacheMeta.read(buf, data_file="") + if meta is None: + # Can't deserialize (e.g. different mypy version). Fall back to + # raw bytes -- we lose mtime normalization but the diff stays correct. + return data + normalize_meta(meta) + return serialize_meta_ff(meta, version_prefix) + if s.endswith(".data.ff"): + return data obj = json_loads(data) if s.endswith(".meta.json"): - # For meta files, zero out the mtimes and sort the - # dependencies to avoid spurious conflicts - obj["mtime"] = 0 - obj["data_mtime"] = 0 - if "dependencies" in obj: - all_deps = obj["dependencies"] + obj["suppressed"] - num_deps = len(obj["dependencies"]) - thing = list(zip(all_deps, obj["dep_prios"], obj["dep_lines"])) - - def unzip(x: Any) -> Any: - return zip(*x) if x else ((), (), ()) - - obj["dependencies"], prios1, lines1 = unzip(sorted(thing[:num_deps])) - obj["suppressed"], prios2, lines2 = unzip(sorted(thing[num_deps:])) - obj["dep_prios"] = prios1 + prios2 - obj["dep_lines"] = lines1 + lines2 + normalize_json_meta(obj) if s.endswith(".deps.json"): # For deps files, sort the deps to avoid spurious mismatches for v in obj.values(): @@ -58,6 +127,17 @@ def unzip(x: Any) -> Any: return obj +def encode_for_diff(s: str, obj: object) -> str: + """Encode a cache entry value for inclusion in the JSON diff. + + Fixed format binary entries are base64-encoded, JSON entries are + re-serialized as JSON strings. + """ + if isinstance(obj, bytes): + return base64.b64encode(obj).decode() + return json_dumps(obj).decode() + + def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--verbose", action="store_true", default=False, help="Increase verbosity") @@ -73,7 +153,7 @@ def main() -> None: type_misses: dict[str, int] = defaultdict(int) type_hits: dict[str, int] = defaultdict(int) - updates: dict[str, bytes | None] = {} + updates: dict[str, str | None] = {} deps1: dict[str, set[str]] = {} deps2: dict[str, set[str]] = {} @@ -96,10 +176,12 @@ def main() -> None: # so we can produce a much smaller direct diff of them. if ".deps." not in s: if obj2 is not None: - updates[s] = json_dumps(obj2) + updates[s] = encode_for_diff(s, obj2) else: updates[s] = None elif obj2: + # This is a deps file, with json data + assert ".deps." in s merge_deps(deps1, obj1) merge_deps(deps2, obj2) else: @@ -109,7 +191,11 @@ def main() -> None: cache1_all_set = set(cache1_all) for s in cache2.list_all(): if s not in cache1_all_set: - updates[s] = cache2.read(s) + raw = cache2.read(s) + if s.endswith(".ff"): + updates[s] = base64.b64encode(raw).decode() + else: + updates[s] = raw.decode() # Compute what deps have been added and merge them all into the # @root deps file. @@ -122,7 +208,7 @@ def main() -> None: merge_deps(new_deps, root_deps) new_deps_json = {k: list(v) for k, v in new_deps.items() if v} - updates["@root.deps.json"] = json_dumps(new_deps_json) + updates["@root.deps.json"] = json_dumps(new_deps_json).decode() # Drop updates to deps.meta.json for size reasons. The diff # applier will manually fix it up. diff --git a/mypy/build.py b/mypy/build.py index 9efc45fb2f73e..bf56075427d04 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -399,7 +399,7 @@ def default_flush_errors( finally: for worker in workers: try: - send(worker.conn, SccRequestMessage(scc_id=None)) + send(worker.conn, SccRequestMessage(scc_id=None, import_errors={})) except (OSError, IPCException): pass for worker in workers: @@ -437,6 +437,9 @@ def build_inner( source_set = BuildSourceSet(sources) cached_read = fscache.read errors = Errors(options, read_source=lambda path: read_py_file(path, cached_read)) + # Record import errors so that they can be replayed by the workers. + if workers: + errors.global_watcher = True plugin, snapshot = load_plugins(options, errors, stdout, extra_plugins) # Validate error codes after plugins are loaded. @@ -724,7 +727,7 @@ class BuildManager: Semantic analyzer, pass 2 all_types: Map {Expression: Type} from all modules (enabled by export_types) options: Build options - missing_modules: Set of modules that could not be imported encountered so far + missing_modules: Modules that could not be imported (or intentionally skipped) stale_modules: Set of modules that needed to be rechecked (only used by tests) fg_deps_meta: Metadata for fine-grained dependencies caches associated with modules fg_deps: A fine-grained dependency map @@ -785,7 +788,7 @@ def __init__( self.version_id = version_id self.modules: dict[str, MypyFile] = {} self.import_map: dict[str, set[str]] = {} - self.missing_modules: set[str] = set() + self.missing_modules: dict[str, int] = {} self.fg_deps_meta: dict[str, FgDepMeta] = {} # fg_deps holds the dependencies of every module that has been # processed. We store this in BuildManager so that we can compute @@ -904,6 +907,10 @@ def __init__( self.import_options: dict[str, bytes] = {} # Cache for transitive dependency check (expensive). self.transitive_deps_cache: dict[tuple[int, int], bool] = {} + # Resolved paths for each module in build. + self.path_by_id: dict[str, str] = {} + # Packages for which we know presence or absence of __getattr__(). + self.known_partial_packages: dict[str, bool] = {} def dump_stats(self) -> None: if self.options.dump_build_stats: @@ -1045,8 +1052,6 @@ def parse_file( if self.errors.is_blockers(): self.log("Bailing due to parse errors") self.errors.raise_error() - - self.errors.set_file_ignored_lines(path, tree.ignored_lines, ignore_errors) return tree def load_fine_grained_deps(self, id: str) -> dict[str, set[str]]: @@ -1118,7 +1123,15 @@ def submit_to_workers(self, sccs: list[SCC] | None = None) -> None: while self.scc_queue and self.free_workers: idx = self.free_workers.pop() _, _, scc = heappop(self.scc_queue) - send(self.workers[idx].conn, SccRequestMessage(scc_id=scc.id)) + import_errors = { + mod_id: self.errors.recorded[path] + for mod_id in scc.mod_ids + if (path := self.path_by_id[mod_id]) in self.errors.recorded + } + send( + self.workers[idx].conn, + SccRequestMessage(scc_id=scc.id, import_errors=import_errors), + ) def wait_for_done( self, graph: Graph @@ -2147,9 +2160,17 @@ def write_cache_meta(meta: CacheMeta, manager: BuildManager, meta_file: str) -> """ +class SuppressionReason: + NOT_FOUND: Final = 1 + SKIPPED: Final = 2 + + class ModuleNotFound(Exception): """Control flow exception to signal that a module was not found.""" + def __init__(self, reason: int = SuppressionReason.NOT_FOUND) -> None: + self.reason = reason + class State: """The state for a module. @@ -2280,9 +2301,9 @@ def new_state( root_source, skip_diagnose=temporary, ) - except ModuleNotFound: + except ModuleNotFound as exc: if not temporary: - manager.missing_modules.add(id) + manager.missing_modules[id] = exc.reason raise if follow_imports == "silent": ignore_all = True @@ -2391,8 +2412,10 @@ def new_state( state.compute_dependencies() if manager.workers: # We don't need parsed trees in coordinator process, we parse only to - # compute dependencies. - state.tree = None + # compute dependencies. Keep temporary tree until the caller uses it + if not temporary: + state.tree = None + del manager.modules[id] del manager.ast_cache[id] return state @@ -2525,7 +2548,8 @@ def read(cls, buf: ReadBuffer, manager: BuildManager) -> State: id=id, path=path, source=source, - options=manager.options.clone_for_module(id), + # The caller must call clone_for_module(). + options=manager.options, ignore_all=ignore_all, caller_line=caller_line, import_context=import_context, @@ -2713,7 +2737,7 @@ def parse_file(self, *, temporary: bool = False) -> None: assert ioerr.errno is not None raise CompileError( [ - "mypy: can't read file '{}': {}".format( + "mypy: error: cannot read file '{}': {}".format( self.path.replace(os.getcwd() + os.sep, ""), os.strerror(ioerr.errno), ) @@ -2722,9 +2746,9 @@ def parse_file(self, *, temporary: bool = False) -> None: ) from ioerr except (UnicodeDecodeError, DecodeError) as decodeerr: if self.path.endswith(".pyd"): - err = f"mypy: stubgen does not support .pyd files: '{self.path}'" + err = f"{self.path}: error: stubgen does not support .pyd files" else: - err = f"mypy: can't decode file '{self.path}': {str(decodeerr)}" + err = f"{self.path}: error: cannot decode file: {str(decodeerr)}" raise CompileError([err], module_with_blocker=self.id) from decodeerr elif self.path and self.manager.fscache.isdir(self.path): source = "" @@ -2738,22 +2762,13 @@ def parse_file(self, *, temporary: bool = False) -> None: self.size_hint = len(source) if not cached: + ignore_errors = self.ignore_all or self.options.ignore_errors self.tree = manager.parse_file( - self.id, - self.xpath, - source, - ignore_errors=self.ignore_all or self.options.ignore_errors, - options=self.options, + self.id, self.xpath, source, ignore_errors=ignore_errors, options=self.options ) - else: # Reuse a cached AST self.tree = manager.ast_cache[self.id][0] - manager.errors.set_file_ignored_lines( - self.xpath, - self.tree.ignored_lines, - self.ignore_all or self.options.ignore_errors, - ) self.time_spent_us += time_spent_us(t0) @@ -2762,19 +2777,23 @@ def parse_file(self, *, temporary: bool = False) -> None: # fine-grained mode can repeat them when the module is # reprocessed. self.early_errors = list(manager.errors.error_info_map.get(self.xpath, [])) + self.semantic_analysis_pass1() else: self.early_errors = manager.ast_cache[self.id][1] if not temporary: modules[self.id] = self.tree - - if not cached: - self.semantic_analysis_pass1() - - if not temporary: self.check_blockers() manager.ast_cache[self.id] = (self.tree, self.early_errors) + self.setup_errors() + + def setup_errors(self) -> None: + assert self.tree is not None + self.manager.errors.set_file_ignored_lines( + self.xpath, self.tree.ignored_lines, self.ignore_all or self.options.ignore_errors + ) + self.manager.errors.set_skipped_lines(self.xpath, self.tree.skipped_lines) def parse_inline_configuration(self, source: str) -> None: """Check for inline mypy: options directive and parse them.""" @@ -2813,7 +2832,6 @@ def semantic_analysis_pass1(self) -> None: analyzer = SemanticAnalyzerPreAnalysis() with self.wrap_context(): analyzer.visit_file(self.tree, self.xpath, self.id, options) - self.manager.errors.set_skipped_lines(self.xpath, self.tree.skipped_lines) # TODO: Do this while constructing the AST? self.tree.names = SymbolTable() if not self.tree.is_stub: @@ -3040,9 +3058,13 @@ def suppressed_deps_opts(self) -> bytes: buf = WriteBuffer() import_options = self.manager.import_options for dep in sorted(self.suppressed): + # Using .get() is a bit defensive, but just in case we have a bug elsewhere + # (e.g. in the daemon), it is better to get a stale cache than a crash. + reason = self.manager.missing_modules.get(dep, SuppressionReason.NOT_FOUND) if self.priorities.get(dep) != PRI_INDIRECT: write_str_bare(buf, dep) write_bytes_bare(buf, import_options[dep]) + write_int_bare(buf, reason) return buf.getvalue() def write_cache(self) -> tuple[CacheMeta, str] | None: @@ -3229,7 +3251,12 @@ def find_module_and_diagnose( skipping_ancestor(manager, id, result, ancestor_for) else: skipping_module(manager, caller_line, caller_state, id, result) - raise ModuleNotFound + reason = SuppressionReason.SKIPPED + if options.ignore_missing_imports: + # Performance optimization: when we are ignoring imports, there is no + # difference for the caller between skipped import and actually missing one. + reason = SuppressionReason.NOT_FOUND + raise ModuleNotFound(reason=reason) if is_silent_import_module(manager, result) and not root_source: follow_imports = "silent" return result, follow_imports @@ -3345,23 +3372,28 @@ def in_partial_package(id: str, manager: BuildManager) -> bool: defines a module-level __getattr__ (a.k.a. partial stub package). """ while "." in id: - parent, _ = id.rsplit(".", 1) - if parent in manager.modules: - parent_mod: MypyFile | None = manager.modules[parent] + ancestor, _ = id.rsplit(".", 1) + if ancestor in manager.known_partial_packages: + return manager.known_partial_packages[ancestor] + if ancestor in manager.modules: + ancestor_mod: MypyFile | None = manager.modules[ancestor] else: - # Parent is not in build, try quickly if we can find it. + # Ancestor is not in build, try quickly if we can find it. try: - parent_st = State.new_state( - id=parent, path=None, source=None, manager=manager, temporary=True + ancestor_st = State.new_state( + id=ancestor, path=None, source=None, manager=manager, temporary=True ) except (ModuleNotFound, CompileError): - parent_mod = None + ancestor_mod = None else: - parent_mod = parent_st.tree - if parent_mod is not None: + ancestor_mod = ancestor_st.tree + # We will not need this anymore. + ancestor_st.tree = None + if ancestor_mod is not None: # Bail out soon, complete subpackage found - return parent_mod.is_partial_stub_package - id = parent + manager.known_partial_packages[ancestor] = ancestor_mod.is_partial_stub_package + return ancestor_mod.is_partial_stub_package + id = ancestor return False @@ -3520,7 +3552,7 @@ def dispatch(sources: list[BuildSource], manager: BuildManager, stdout: TextIO) initial_gc_freeze_done = True for id in graph: - manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) + manager.import_map[id] = graph[id].dependencies_set t1 = time.time() manager.add_stats( @@ -3754,7 +3786,7 @@ def load_graph( for dep in st.ancestors + dependencies + st.suppressed: ignored = dep in st.suppressed_set and dep not in entry_points if ignored and dep not in added: - manager.missing_modules.add(dep) + manager.missing_modules[dep] = SuppressionReason.NOT_FOUND # TODO: for now we skip this in the daemon as a performance optimization. # This however creates a correctness issue, see #7777 and State.is_fresh(). if not manager.use_fine_grained_cache(): @@ -3810,10 +3842,10 @@ def load_graph( # modules that are back in graph. We need to do this after the loop to cover an edge # case where a namespace package ancestor is shared by a typed and an untyped package. for st in graph.values(): - for dep in st.suppressed: + for dep in st.suppressed.copy(): if dep in graph: st.add_dependency(dep) - manager.missing_modules.discard(dep) + manager.missing_modules.pop(dep, None) # Second, in the initial loop we skip indirect dependencies, so to make indirect dependencies # behave more consistently with regular ones, we suppress them manually here (when needed). for st in graph.values(): @@ -3822,6 +3854,8 @@ def load_graph( if dep not in graph: st.suppress_dependency(dep) manager.plugin.set_modules(manager.modules) + manager.path_by_id = {id: graph[id].xpath for id in graph} + manager.errors.global_watcher = False return graph @@ -3949,7 +3983,9 @@ def find_stale_sccs( def process_graph(graph: Graph, manager: BuildManager) -> None: """Process everything in dependency order.""" # Broadcast graph to workers before computing SCCs to save a bit of time. - graph_message = GraphMessage(graph=graph) + # TODO: check if we can optimize by sending only part of the graph needed for given SCC. + # For example only send modules in the SCC and their dependencies. + graph_message = GraphMessage(graph=graph, missing_modules=set(manager.missing_modules)) buf = WriteBuffer() graph_message.write(buf) graph_data = buf.getvalue() @@ -4091,7 +4127,7 @@ def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManage def process_stale_scc( - graph: Graph, ascc: SCC, manager: BuildManager + graph: Graph, ascc: SCC, manager: BuildManager, from_cache: set[str] | None = None ) -> dict[str, tuple[str, list[str]]]: """Process the modules in one SCC from source code.""" # First verify if all transitive dependencies are loaded in the current process. @@ -4156,7 +4192,9 @@ def process_stale_scc( stale = scc for id in stale: # Re-generate import errors in case this module was loaded from the cache. - if graph[id].meta: + # Deserialized states all have meta=None, so the caller should specify + # explicitly which of them are from cache. + if graph[id].meta or from_cache and id in from_cache: graph[id].verify_dependencies(suppressed_only=True) # We may already have parsed the module, or not. # If the former, parse_file() is a no-op. @@ -4419,17 +4457,30 @@ class SccRequestMessage(IPCMessage): If scc_id is None, then it means that the coordinator requested a shutdown. """ - def __init__(self, *, scc_id: int | None) -> None: + def __init__(self, *, scc_id: int | None, import_errors: dict[str, list[ErrorInfo]]) -> None: self.scc_id = scc_id + self.import_errors = import_errors @classmethod def read(cls, buf: ReadBuffer) -> SccRequestMessage: assert read_tag(buf) == SCC_REQUEST_MESSAGE - return SccRequestMessage(scc_id=read_int_opt(buf)) + return SccRequestMessage( + scc_id=read_int_opt(buf), + import_errors={ + read_str(buf): [ErrorInfo.read(buf) for _ in range(read_int_bare(buf))] + for _ in range(read_int_bare(buf)) + }, + ) def write(self, buf: WriteBuffer) -> None: write_tag(buf, SCC_REQUEST_MESSAGE) write_int_opt(buf, self.scc_id) + write_int_bare(buf, len(self.import_errors)) + for path, errors in self.import_errors.items(): + write_str(buf, path) + write_int_bare(buf, len(errors)) + for error in errors: + error.write(buf) class SccResponseMessage(IPCMessage): @@ -4553,15 +4604,21 @@ def write(self, buf: WriteBuffer) -> None: class GraphMessage(IPCMessage): """A message wrapping the build graph computed by the coordinator.""" - def __init__(self, *, graph: Graph) -> None: + def __init__(self, *, graph: Graph, missing_modules: set[str]) -> None: self.graph = graph + self.missing_modules = missing_modules + # Send this data separately as it will be lost during state serialization. + self.from_cache = {mod_id for mod_id in graph if graph[mod_id].meta} @classmethod def read(cls, buf: ReadBuffer, manager: BuildManager | None = None) -> GraphMessage: assert manager is not None assert read_tag(buf) == GRAPH_MESSAGE graph = {read_str_bare(buf): State.read(buf, manager) for _ in range(read_int_bare(buf))} - return GraphMessage(graph=graph) + missing_modules = {read_str_bare(buf) for _ in range(read_int_bare(buf))} + message = GraphMessage(graph=graph, missing_modules=missing_modules) + message.from_cache = {read_str_bare(buf) for _ in range(read_int_bare(buf))} + return message def write(self, buf: WriteBuffer) -> None: write_tag(buf, GRAPH_MESSAGE) @@ -4569,3 +4626,9 @@ def write(self, buf: WriteBuffer) -> None: for mod_id, state in self.graph.items(): write_str_bare(buf, mod_id) state.write(buf) + write_int_bare(buf, len(self.missing_modules)) + for module in self.missing_modules: + write_str_bare(buf, module) + write_int_bare(buf, len(self.from_cache)) + for module in self.from_cache: + write_str_bare(buf, module) diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index 06159bd9a887f..d5069731b54c5 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -112,6 +112,12 @@ def main(argv: list[str]) -> None: def serve(server: IPCServer, ctx: ServerContext) -> None: + """Main server loop of the worker. + + Receive initial state from the coordinator, then process each + SCC checking request and reply to client (coordinator). See module + docstring for more details on the protocol. + """ sources = SourcesDataMessage.read(receive(server)).sources manager = setup_worker_manager(sources, ctx) if manager is None: @@ -130,13 +136,18 @@ def serve(server: IPCServer, ctx: ServerContext) -> None: gc.unfreeze() gc.enable() for id in graph: - manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) + manager.import_map[id] = graph[id].dependencies_set + # Ignore errors during local graph loading to check that receiving + # early errors from coordinator works correctly. + manager.errors.reset() # Notify worker we are done loading graph. send(server, AckMessage()) # Compare worker graph and coordinator, with parallel parser we will only use the latter. - coordinator_graph = GraphMessage.read(receive(server), manager).graph + graph_data = GraphMessage.read(receive(server), manager) + assert set(manager.missing_modules) == graph_data.missing_modules + coordinator_graph = graph_data.graph assert coordinator_graph.keys() == graph.keys() for id in graph: assert graph[id].dependencies_set == coordinator_graph[id].dependencies_set @@ -150,14 +161,29 @@ def serve(server: IPCServer, ctx: ServerContext) -> None: # Notify coordinator we are ready to process SCCs. send(server, AckMessage()) while True: - scc_id = SccRequestMessage.read(receive(server)).scc_id + scc_message = SccRequestMessage.read(receive(server)) + scc_id = scc_message.scc_id if scc_id is None: manager.dump_stats() break scc = manager.scc_by_id[scc_id] t0 = time.time() try: - result = process_stale_scc(graph, scc, manager) + for id in scc.mod_ids: + state = graph[id] + # Extra if below is needed only because we are using local graph. + # TODO: clone options when switching to coordinator graph. + if state.tree is None: + # Parse early to get errors related data, such as ignored + # and skipped lines before replaying the errors. + state.parse_file() + else: + state.setup_errors() + if id in scc_message.import_errors: + manager.errors.set_file(state.xpath, id, state.options) + for err_info in scc_message.import_errors[id]: + manager.errors.add_error_info(err_info) + result = process_stale_scc(graph, scc, manager, from_cache=graph_data.from_cache) # We must commit after each SCC, otherwise we break --sqlite-cache. manager.metastore.commit() except CompileError as blocker: diff --git a/mypy/errorcodes.py b/mypy/errorcodes.py index 927cd32f8fe0e..5c28e8332a76c 100644 --- a/mypy/errorcodes.py +++ b/mypy/errorcodes.py @@ -284,11 +284,6 @@ def __hash__(self) -> int: # Syntax errors are often blocking. SYNTAX: Final = ErrorCode("syntax", "Report syntax errors", "General") -# This is an internal marker code for a whole-file ignore. It is not intended to -# be user-visible. -FILE: Final = ErrorCode("file", "Internal marker for a whole file being ignored", "General") -del error_codes[FILE.code] - # This is a catch-all for remaining uncategorized errors. MISC: Final = ErrorCode("misc", "Miscellaneous other checks", "General") diff --git a/mypy/errors.py b/mypy/errors.py index 9691f924c523d..5ffada781b9ab 100644 --- a/mypy/errors.py +++ b/mypy/errors.py @@ -9,8 +9,27 @@ from typing import Final, Literal, NoReturn, TextIO, TypeVar from typing_extensions import Self +from librt.internal import ( + ReadBuffer, + WriteBuffer, + read_bool, + read_int as read_int_bare, + write_bool, + write_int as write_int_bare, +) + from mypy import errorcodes as codes -from mypy.cache import ErrorTuple +from mypy.cache import ( + ErrorTuple, + read_int, + read_int_list, + read_str, + read_str_opt, + write_int, + write_int_list, + write_str, + write_str_opt, +) from mypy.error_formatter import ErrorFormatter from mypy.errorcodes import IMPORT, IMPORT_NOT_FOUND, IMPORT_UNTYPED, ErrorCode, mypy_error_codes from mypy.nodes import Context @@ -137,6 +156,49 @@ def __init__( assert severity == "note", "Only notes can specify parent errors" self.parent_error = parent_error + def write(self, buf: WriteBuffer) -> None: + assert self.parent_error is None, "Parent errors not supported yet" + write_int_bare(buf, len(self.import_ctx)) + for file, line in self.import_ctx: + write_str(buf, file) + write_int(buf, line) + type, function = self.local_ctx + write_str_opt(buf, type) + write_str_opt(buf, function) + write_int(buf, self.line) + write_int(buf, self.column) + write_int(buf, self.end_line) + write_int(buf, self.end_column) + write_str(buf, self.severity) + write_str(buf, self.message) + write_str_opt(buf, self.code.code if self.code else None) + write_bool(buf, self.blocker) + write_bool(buf, self.only_once) + write_str_opt(buf, self.module) + write_str_opt(buf, self.target) + write_int_list(buf, list(self.origin_span)) + write_int(buf, self.priority) + + @classmethod + def read(cls, buf: ReadBuffer) -> ErrorInfo: + return ErrorInfo( + import_ctx=[(read_str(buf), read_int(buf)) for _ in range(read_int_bare(buf))], + local_ctx=(read_str_opt(buf), read_str_opt(buf)), + line=read_int(buf), + column=read_int(buf), + end_line=read_int(buf), + end_column=read_int(buf), + severity=read_str(buf), + message=read_str(buf), + code=mypy_error_codes[code] if (code := read_str_opt(buf)) else None, + blocker=read_bool(buf), + only_once=read_bool(buf), + module=read_str_opt(buf), + target=read_str_opt(buf), + origin_span=read_int_list(buf), + priority=read_int(buf), + ) + class ErrorWatcher: """Context manager that can be used to keep track of new errors recorded @@ -425,6 +487,11 @@ class Errors: # in some cases to avoid reporting huge numbers of errors. seen_import_error = False + # Set this flag to record all raw report() calls. Recorded error (per file) can + # be replayed using by calling set_file() and add_error_info(). + global_watcher = False + recorded: dict[str, list[ErrorInfo]] + _watchers: list[ErrorWatcher] def __init__( @@ -457,6 +524,8 @@ def initialize(self) -> None: self.target_module = None self.seen_import_error = False self._watchers = [] + self.global_watcher = False + self.recorded = defaultdict(list) def reset(self) -> None: self.initialize() @@ -604,6 +673,8 @@ def report( target=self.current_target(), parent_error=parent_error, ) + if self.global_watcher: + self.recorded[self.file].append(info) self.add_error_info(info) return info @@ -873,19 +944,14 @@ def generate_ignore_without_code_errors( return used_ignored_lines = self.used_ignored_lines[file] - - # If the whole file is ignored, ignore it. - if used_ignored_lines: - _, used_codes = min(used_ignored_lines.items()) - if codes.FILE.code in used_codes: - return - for line, ignored_codes in self.ignored_lines[file].items(): + if line in self.skipped_lines[file]: + continue if ignored_codes: continue - # If the ignore is itself unused and that would be warned about, let - # that error stand alone + # If the `type: ignore` is itself unused and that would be warned about, + # let that error stand alone if is_warning_unused_ignores and not used_ignored_lines[line]: continue diff --git a/mypy/fastparse.py b/mypy/fastparse.py index 8ef905a567d14..701e449f8f338 100644 --- a/mypy/fastparse.py +++ b/mypy/fastparse.py @@ -463,20 +463,17 @@ def translate_stmt_list( ismodule and stmts and self.type_ignores - and min(self.type_ignores) < self.get_lineno(stmts[0]) + and (first := min(self.type_ignores)) < self.get_lineno(stmts[0]) ): - ignores = self.type_ignores[min(self.type_ignores)] + ignores = self.type_ignores.pop(first) if ignores: joined_ignores = ", ".join(ignores) self.fail( message_registry.TYPE_IGNORE_WITH_ERRCODE_ON_MODULE.format(joined_ignores), - line=min(self.type_ignores), + line=first, column=0, blocker=False, ) - self.errors.used_ignored_lines[self.errors.file][min(self.type_ignores)].append( - codes.FILE.code - ) block = Block(self.fix_function_overloads(self.translate_stmt_list(stmts))) self.set_block_lines(block, stmts) mark_block_unreachable(block) diff --git a/mypy/nodes.py b/mypy/nodes.py index 4168b2e00f155..4c238592f9927 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -759,7 +759,7 @@ def write(self, data: WriteBuffer) -> None: write_tag(data, LITERAL_NONE) else: self.impl.write(data) - write_flags(data, self, FUNCBASE_FLAGS) + write_flags(data, [self.is_property, self.is_class, self.is_static, self.is_final]) write_str_opt(data, self.deprecated) write_int_opt(data, self.setter_index) write_tag(data, END_TAG) @@ -779,7 +779,7 @@ def read(cls, data: ReadBuffer) -> OverloadedFuncDef: # set line for empty overload items, as not set in __init__ if len(res.items) > 0: res.set_line(res.impl.line) - read_flags(data, res, FUNCBASE_FLAGS) + res.is_property, res.is_class, res.is_static, res.is_final = read_flags(data, num_flags=4) res.deprecated = read_str_opt(data) res.setter_index = read_int_opt(data) # NOTE: res.info will be set in the fixup phase. @@ -1067,7 +1067,25 @@ def write(self, data: WriteBuffer) -> None: write_str(data, self._name) mypy.types.write_type_opt(data, self.type) write_str(data, self._fullname) - write_flags(data, self, FUNCDEF_FLAGS) + write_flags( + data, + [ + self.is_property, + self.is_class, + self.is_static, + self.is_final, + self.is_overload, + self.is_generator, + self.is_coroutine, + self.is_async_generator, + self.is_awaitable_coroutine, + self.is_decorated, + self.is_conditional, + self.is_trivial_body, + self.is_trivial_self, + self.is_mypy_only, + ], + ) write_str_opt_list(data, self.arg_names) write_int_list(data, [int(ak.value) for ak in self.arg_kinds]) write_int(data, self.abstract_status) @@ -1088,7 +1106,22 @@ def read(cls, data: ReadBuffer) -> FuncDef: typ = mypy.types.read_function_like(data, tag) ret = FuncDef(name, [], Block([]), typ) ret._fullname = read_str(data) - read_flags(data, ret, FUNCDEF_FLAGS) + ( + ret.is_property, + ret.is_class, + ret.is_static, + ret.is_final, + ret.is_overload, + ret.is_generator, + ret.is_coroutine, + ret.is_async_generator, + ret.is_awaitable_coroutine, + ret.is_decorated, + ret.is_conditional, + ret.is_trivial_body, + ret.is_trivial_self, + ret.is_mypy_only, + ) = read_flags(data, num_flags=14) # NOTE: ret.info is set in the fixup phase. ret.arg_names = read_str_opt_list(data) ret.arg_kinds = [ARG_KINDS[ak] for ak in read_int_list(data)] @@ -1378,7 +1411,32 @@ def write(self, data: WriteBuffer) -> None: mypy.types.write_type_opt(data, self.type) mypy.types.write_type_opt(data, self.setter_type) write_str(data, self._fullname) - write_flags(data, self, VAR_FLAGS) + write_flags( + data, + [ + self.is_self, + self.is_cls, + self.is_initialized_in_class, + self.is_staticmethod, + self.is_classmethod, + self.is_property, + self.is_settable_property, + self.is_suppressed_import, + self.is_classvar, + self.is_abstract_var, + self.is_final, + self.is_index_var, + self.final_unset_in_class, + self.final_set_in_init, + self.explicit_self_type, + self.is_ready, + self.is_inferred, + self.invalid_partial_type, + self.from_module_getattr, + self.has_explicit_value, + self.allow_incompatible_override, + ], + ) write_literal(data, self.final_value) write_tag(data, END_TAG) @@ -1393,9 +1451,30 @@ def read(cls, data: ReadBuffer) -> Var: assert tag == mypy.types.CALLABLE_TYPE setter_type = mypy.types.CallableType.read(data) v.setter_type = setter_type - v.is_ready = False # Override True default set in __init__ v._fullname = read_str(data) - read_flags(data, v, VAR_FLAGS) + ( + v.is_self, + v.is_cls, + v.is_initialized_in_class, + v.is_staticmethod, + v.is_classmethod, + v.is_property, + v.is_settable_property, + v.is_suppressed_import, + v.is_classvar, + v.is_abstract_var, + v.is_final, + v.is_index_var, + v.final_unset_in_class, + v.final_set_in_init, + v.explicit_self_type, + v.is_ready, + v.is_inferred, + v.invalid_partial_type, + v.from_module_getattr, + v.has_explicit_value, + v.allow_incompatible_override, + ) = read_flags(data, num_flags=21) tag = read_tag(data) if tag == LITERAL_COMPLEX: v.final_value = complex(read_float_bare(data), read_float_bare(data)) @@ -4035,7 +4114,22 @@ def write(self, data: WriteBuffer) -> None: mypy.types.write_type_opt(data, self.metaclass_type) mypy.types.write_type_opt(data, self.tuple_type) mypy.types.write_type_opt(data, self.typeddict_type) - write_flags(data, self, TypeInfo.FLAGS) + write_flags( + data, + [ + self.is_abstract, + self.is_enum, + self.fallback_to_any, + self.meta_fallback_to_any, + self.is_named_tuple, + self.is_newtype, + self.is_protocol, + self.runtime_protocol, + self.is_final, + self.is_disjoint_base, + self.is_intersection, + ], + ) write_json(data, self.metadata) if self.slots is None: write_tag(data, LITERAL_NONE) @@ -4095,7 +4189,19 @@ def read(cls, data: ReadBuffer) -> TypeInfo: if (tag := read_tag(data)) != LITERAL_NONE: assert tag == mypy.types.TYPED_DICT_TYPE ti.typeddict_type = mypy.types.TypedDictType.read(data) - read_flags(data, ti, TypeInfo.FLAGS) + ( + ti.is_abstract, + ti.is_enum, + ti.fallback_to_any, + ti.meta_fallback_to_any, + ti.is_named_tuple, + ti.is_newtype, + ti.is_protocol, + ti.runtime_protocol, + ti.is_final, + ti.is_disjoint_base, + ti.is_intersection, + ) = read_flags(data, num_flags=11) ti.metadata = read_json(data) tag = read_tag(data) if tag != LITERAL_NONE: @@ -4882,15 +4988,18 @@ def set_flags(node: Node, flags: list[str]) -> None: setattr(node, name, True) -def write_flags(data: WriteBuffer, node: SymbolNode, flags: list[str]) -> None: - for flag in flags: - write_bool(data, getattr(node, flag)) +def write_flags(data: WriteBuffer, flags: list[bool]) -> None: + assert len(flags) <= 26, "This many flags not supported yet" + packed = 0 + for i, flag in enumerate(flags): + if flag: + packed |= 1 << i + write_int(data, packed) -def read_flags(data: ReadBuffer, node: SymbolNode, flags: list[str]) -> None: - for flag in flags: - if read_bool(data): - setattr(node, flag, True) +def read_flags(data: ReadBuffer, num_flags: int) -> list[bool]: + packed = read_int(data) + return [(packed & (1 << i)) != 0 for i in range(num_flags)] def get_member_expr_fullname(expr: MemberExpr) -> str | None: diff --git a/mypy/semanal.py b/mypy/semanal.py index f38a71cb16e30..219459c92e3ce 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -448,7 +448,7 @@ class SemanticAnalyzer( def __init__( self, modules: dict[str, MypyFile], - missing_modules: set[str], + missing_modules: dict[str, int], incomplete_namespaces: set[str], errors: Errors, plugin: Plugin, @@ -6766,6 +6766,7 @@ def get_module_symbol(self, node: MypyFile, name: str) -> SymbolTableNode | None return sym def is_visible_import(self, base_id: str, id: str) -> bool: + # TODO: can we reuse SCC-level tracking from build.py instead? if id in self.import_map[self.cur_mod_id]: # Fast path: module is imported locally. return True diff --git a/mypy/server/update.py b/mypy/server/update.py index 741d08ec9d204..c330033ad49bf 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -128,6 +128,7 @@ BuildResult, Graph, State, + SuppressionReason, load_graph, process_fresh_modules, ) @@ -591,7 +592,7 @@ def update_module_isolated( sources = get_sources(manager.fscache, previous_modules, [(module, path)], followed) if module in manager.missing_modules: - manager.missing_modules.remove(module) + del manager.missing_modules[module] orig_module = module orig_state = graph.get(module) @@ -727,7 +728,8 @@ def delete_module(module_id: str, path: str, graph: Graph, manager: BuildManager # If the module is removed from the build but still exists, then # we mark it as missing so that it will get picked up by import from still. if manager.fscache.isfile(path): - manager.missing_modules.add(module_id) + # TODO: check if there is an equivalent of #20800 for the daemon. + manager.missing_modules[module_id] = SuppressionReason.NOT_FOUND def dedupe_modules(modules: list[tuple[str, str]]) -> list[tuple[str, str]]: diff --git a/mypyc/build_setup.py b/mypyc/build_setup.py index b70a1df7930e8..ec995935990c4 100644 --- a/mypyc/build_setup.py +++ b/mypyc/build_setup.py @@ -19,18 +19,11 @@ from distutils import ccompiler EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = { - "unix": { - "base64/arch/ssse3": ["-mssse3"], - "base64/arch/sse41": ["-msse4.1"], - "base64/arch/sse42": ["-msse4.2"], - "base64/arch/avx2": ["-mavx2"], - "base64/arch/avx": ["-mavx"], - }, "msvc": { "base64/arch/sse42": ["/arch:SSE4.2"], "base64/arch/avx2": ["/arch:AVX2"], "base64/arch/avx": ["/arch:AVX"], - }, + } } ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined] diff --git a/mypyc/lib-rt/base64/arch/avx/codec.c b/mypyc/lib-rt/base64/arch/avx/codec.c index 7a64a94be2aff..9b0ef58cd62ff 100644 --- a/mypyc/lib-rt/base64/arch/avx/codec.c +++ b/mypyc/lib-rt/base64/arch/avx/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_AVX +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("avx"))), apply_to=function) +#else +#pragma GCC target("avx") +#endif #include // Only enable inline assembly on supported compilers and on 64-bit CPUs. @@ -62,6 +67,9 @@ base64_stream_decode_avx BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_ssse3(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/arch/avx2/codec.c b/mypyc/lib-rt/base64/arch/avx2/codec.c index a54385bf89bea..e7ecf5da4ec79 100644 --- a/mypyc/lib-rt/base64/arch/avx2/codec.c +++ b/mypyc/lib-rt/base64/arch/avx2/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_AVX2 +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#else +#pragma GCC target("avx2") +#endif #include // Only enable inline assembly on supported compilers and on 64-bit CPUs. @@ -52,6 +57,9 @@ base64_stream_decode_avx2 BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_avx2(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/arch/avx512/codec.c b/mypyc/lib-rt/base64/arch/avx512/codec.c index 98210826a5fe9..44c11acbd028c 100644 --- a/mypyc/lib-rt/base64/arch/avx512/codec.c +++ b/mypyc/lib-rt/base64/arch/avx512/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_AVX512 +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("avx512vbmi"))), apply_to=function) +#else +#pragma GCC target("avx512vbmi") +#endif #include #include "../avx2/dec_reshuffle.c" @@ -38,6 +43,9 @@ base64_stream_decode_avx512 BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_avx2(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/arch/sse41/codec.c b/mypyc/lib-rt/base64/arch/sse41/codec.c index c627db5f726d4..cb8c8f3a84097 100644 --- a/mypyc/lib-rt/base64/arch/sse41/codec.c +++ b/mypyc/lib-rt/base64/arch/sse41/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_SSE41 +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("sse4.1"))), apply_to=function) +#else +#pragma GCC target("sse4.1") +#endif #include // Only enable inline assembly on supported compilers and on 64-bit CPUs. @@ -52,6 +57,9 @@ base64_stream_decode_sse41 BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_ssse3(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/arch/sse42/codec.c b/mypyc/lib-rt/base64/arch/sse42/codec.c index 2fe4e2997aa14..ec70a02970320 100644 --- a/mypyc/lib-rt/base64/arch/sse42/codec.c +++ b/mypyc/lib-rt/base64/arch/sse42/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_SSE42 +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("sse4.2"))), apply_to=function) +#else +#pragma GCC target("sse4.2") +#endif #include // Only enable inline assembly on supported compilers and on 64-bit CPUs. @@ -52,6 +57,9 @@ base64_stream_decode_sse42 BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_ssse3(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/arch/ssse3/codec.c b/mypyc/lib-rt/base64/arch/ssse3/codec.c index e51b3dfdb1677..2a3577ff8fc3f 100644 --- a/mypyc/lib-rt/base64/arch/ssse3/codec.c +++ b/mypyc/lib-rt/base64/arch/ssse3/codec.c @@ -9,6 +9,11 @@ #include "../../env.h" #if HAVE_SSSE3 +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function) +#else +#pragma GCC target("ssse3") +#endif #include // Only enable inline assembly on supported compilers and on 64-bit CPUs. @@ -54,6 +59,9 @@ base64_stream_decode_ssse3 BASE64_DEC_PARAMS #include "../generic/dec_head.c" dec_loop_ssse3(&s, &slen, &o, &olen); #include "../generic/dec_tail.c" +#if defined(__clang__) + #pragma clang attribute pop +#endif #else return base64_dec_stub(state, src, srclen, out, outlen); #endif diff --git a/mypyc/lib-rt/base64/librt_base64.c b/mypyc/lib-rt/base64/librt_base64.c index 75e82cb185bfe..4476c016a5757 100644 --- a/mypyc/lib-rt/base64/librt_base64.c +++ b/mypyc/lib-rt/base64/librt_base64.c @@ -1,6 +1,9 @@ #define PY_SSIZE_T_CLEAN #include #include + +#define BASE64_EXPORTS + #include "librt_base64.h" #include "libbase64.h" #include "pythoncapi_compat.h" diff --git a/mypyc/lib-rt/build_setup.py b/mypyc/lib-rt/build_setup.py index b70a1df7930e8..ec995935990c4 100644 --- a/mypyc/lib-rt/build_setup.py +++ b/mypyc/lib-rt/build_setup.py @@ -19,18 +19,11 @@ from distutils import ccompiler EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = { - "unix": { - "base64/arch/ssse3": ["-mssse3"], - "base64/arch/sse41": ["-msse4.1"], - "base64/arch/sse42": ["-msse4.2"], - "base64/arch/avx2": ["-mavx2"], - "base64/arch/avx": ["-mavx"], - }, "msvc": { "base64/arch/sse42": ["/arch:SSE4.2"], "base64/arch/avx2": ["/arch:AVX2"], "base64/arch/avx": ["/arch:AVX"], - }, + } } ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined] diff --git a/test-data/unit/check-errorcodes.test b/test-data/unit/check-errorcodes.test index 53ed6ddda2fad..ba2f23fd72940 100644 --- a/test-data/unit/check-errorcodes.test +++ b/test-data/unit/check-errorcodes.test @@ -1067,13 +1067,11 @@ def f(d: D, s: str) -> None: [typing fixtures/typing-typeddict.pyi] [case testRecommendErrorCode] -# type: ignore[whatever] # E: type ignore with error code is not supported for modules; use `# mypy: disable-error-code="whatever"` [syntax] \ - # N: Error code "syntax" not covered by "type: ignore" comment +# type: ignore[whatever] # E: type ignore with error code is not supported for modules; use `# mypy: disable-error-code="whatever"` [syntax] 1 + "asdf" [case testRecommendErrorCode2] -# type: ignore[whatever, other] # E: type ignore with error code is not supported for modules; use `# mypy: disable-error-code="whatever, other"` [syntax] \ - # N: Error code "syntax" not covered by "type: ignore" comment +# type: ignore[whatever, other] # E: type ignore with error code is not supported for modules; use `# mypy: disable-error-code="whatever, other"` [syntax] 1 + "asdf" [case testShowErrorCodesInConfig] diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 314be23f1537b..a7cff97133dca 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -2337,7 +2337,7 @@ tmp/c.py:1: error: Module "d" has no attribute "x" [delete nonexistent.py.2] [out] [out2] -mypy: can't read file 'tmp/nonexistent.py': No such file or directory +mypy: error: cannot read file 'tmp/nonexistent.py': No such file or directory [case testSerializeAbstractPropertyIncremental] from abc import abstractmethod @@ -7928,3 +7928,22 @@ from a import b # type: ignore[attr-defined] [out] main:2: error: Unused "type: ignore" comment [out2] + +[case testAddedMissingModuleSkip] +# flags: --follow-imports=skip +import mod +[file mod.py.2] +[out] +main:2: error: Cannot find implementation or library stub for module named "mod" +main:2: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[out2] + +[case testDeletedModuleSkip] +# flags: --follow-imports=skip +import mod +[file mod.py] +[delete mod.py.2] +[out] +[out2] +main:2: error: Cannot find implementation or library stub for module named "mod" +main:2: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports diff --git a/test-data/unit/check-modules.test b/test-data/unit/check-modules.test index 101a93f23e05f..c671496ef545f 100644 --- a/test-data/unit/check-modules.test +++ b/test-data/unit/check-modules.test @@ -2233,12 +2233,12 @@ main:2: note: Revealed type is "builtins.int" [builtins fixtures/module.pyi] -[case testFailedImportFromTwoModules] +-- Parallel mode gives only_once notes once *per worker* +[case testFailedImportFromTwoModules_no_parallel] import c import b [file b.py] import c - [out] -- TODO: it would be better for this to be in the other order tmp/b.py:1: error: Cannot find implementation or library stub for module named "c" diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index 2b3f48fec4a0b..cc1093bbf47f9 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -117,7 +117,7 @@ sub.pkg contains __init__.py but is not a valid Python package name [file a.py] # coding: uft-8 [out] -mypy: can't decode file 'a.py': unknown encoding: uft-8 +a.py: error: cannot decode file: unknown encoding: uft-8 == Return code: 2 [case testCannotIgnoreDuplicateModule] @@ -416,7 +416,7 @@ int_pow.py:11: note: Revealed type is "Any" [case testMissingFile] # cmd: mypy nope.py [out] -mypy: can't read file 'nope.py': No such file or directory +mypy: error: cannot read file 'nope.py': No such file or directory == Return code: 2 [case testModulesAndPackages] @@ -631,7 +631,7 @@ c.py:1: error: Name "fail" is not defined \[mypy] files = config.py [out] -mypy: can't read file 'override.py': No such file or directory +mypy: error: cannot read file 'override.py': No such file or directory == Return code: 2 [case testErrorSummaryOnSuccess] @@ -688,7 +688,8 @@ Found 2 errors in 2 files (checked 2 source files) [case testErrorSummaryOnBadUsage] # cmd: mypy --error-summary missing.py [out] -mypy: can't read file 'missing.py': No such file or directory +mypy: error: cannot read file 'missing.py': No such file or directory +Found 1 error in 1 file (errors prevented further checking) == Return code: 2 [case testShowSourceCodeSnippetsWrappedFormatting] @@ -763,7 +764,7 @@ imp.py:2: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#miss [file a.pyd] # coding: uft-8 [out] -mypy: stubgen does not support .pyd files: 'a.pyd' +a.pyd: error: stubgen does not support .pyd files == Return code: 2 [case testDuplicateModules] diff --git a/test-data/unit/fine-grained-blockers.test b/test-data/unit/fine-grained-blockers.test index 0b2d9d2fcb5f6..b894802a40b60 100644 --- a/test-data/unit/fine-grained-blockers.test +++ b/test-data/unit/fine-grained-blockers.test @@ -502,7 +502,7 @@ def f(x: int) -> None: ... def f(x: str) -> None: ... [out] == -mypy: can't decode file 'tmp/a.py': 'ascii' codec can't decode byte 0xc3 in position 16: ordinal not in range(128) +a.py: error: cannot decode file: 'ascii' codec can't decode byte 0xc3 in position 16: ordinal not in range(128) == main:2: error: Argument 1 to "f" has incompatible type "int"; expected "str" @@ -518,7 +518,7 @@ def f(x: int) -> None: ... def f(x: str) -> None: ... [out] == -mypy: can't decode file 'tmp/a.py': 'ascii' codec can't decode byte 0xc3 in position 17: ordinal not in range(128) +a.py: error: cannot decode file: 'ascii' codec can't decode byte 0xc3 in position 17: ordinal not in range(128) == main:2: error: Argument 1 to "f" has incompatible type "int"; expected "str" @@ -532,6 +532,6 @@ a.f(1) [file a.py.2] def f(x: str) -> None: ... [out] -mypy: can't decode file 'tmp/a.py': 'ascii' codec can't decode byte 0xc3 in position 16: ordinal not in range(128) +a.py: error: cannot decode file: 'ascii' codec can't decode byte 0xc3 in position 16: ordinal not in range(128) == main:3: error: Argument 1 to "f" has incompatible type "int"; expected "str" diff --git a/test-data/unit/fine-grained.test b/test-data/unit/fine-grained.test index e2b5458ee8fff..0e42d5ef22dc6 100644 --- a/test-data/unit/fine-grained.test +++ b/test-data/unit/fine-grained.test @@ -6053,9 +6053,9 @@ a.py:1: error: "int" not callable [file a.py.2] 1() [out] -mypy: can't read file 'tmp/nonexistent.py': No such file or directory +mypy: error: cannot read file 'tmp/nonexistent.py': No such file or directory == -mypy: can't read file 'tmp/nonexistent.py': No such file or directory +mypy: error: cannot read file 'tmp/nonexistent.py': No such file or directory [case testNonExistentFileOnCommandLine2] # cmd: mypy a.py From d4ec05be1133db819e0cf99093f5e1497ebbb992 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Feb 2026 14:00:37 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- TODO.md | 1 - 1 file changed, 1 deletion(-) diff --git a/TODO.md b/TODO.md index 53f1c91c1a0da..c3c0cb75e6863 100644 --- a/TODO.md +++ b/TODO.md @@ -8,4 +8,3 @@ The current implementation uses octal escape sequences (`\XXX`) but the tests ex 2. [x] Update CHAR_MAP to use hex escapes instead of octal escapes 3. [x] Keep simple escape sequences for special chars (\n, \r, \t, etc.) 4. [x] Update the docstring to reflect correct format (\xXX instead of \oXXX) -