From efd5c2b6cad6d7a6763c0c6dd57eb9669be4d188 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 16 Feb 2026 18:04:55 +0000 Subject: [PATCH 1/4] Optimize: skip logging and stats collection calls if they are no-ops We had lots of logging that was fairly expensive even if disabled, since just calling the functions or constructing the log messages was slow. Now put some of the most expensive logging calls behind fast bool flag checks. --- mypy/build.py | 153 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 98 insertions(+), 55 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index bf56075427d0..ba772617647a 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -799,6 +799,10 @@ def __init__( if not isinstance(plugin, ChainedPlugin): plugin = ChainedPlugin(options, [plugin]) self.plugin = plugin + # These allow quickly skipping logging and stats collection calls + self.stats_enabled = self.options.dump_build_stats + self.logging_enabled = self.options.verbosity >= 1 + self.tracing_enabled = self.options.verbosity >= 2 # Set of namespaces (module or class) that are being populated during semantic # analysis and may have missing definitions. self.incomplete_namespaces: set[str] = set() @@ -913,7 +917,7 @@ def __init__( self.known_partial_packages: dict[str, bool] = {} def dump_stats(self) -> None: - if self.options.dump_build_stats: + if self.stats_enabled: print("Stats:") for key, value in sorted(self.stats_summary().items()): print(f"{key + ':':24}{value}") @@ -1099,11 +1103,12 @@ def trace(self, *message: str) -> None: self.stderr.flush() def add_stats(self, **kwds: Any) -> None: - for key, value in kwds.items(): - if key in self.stats: - self.stats[key] += value - else: - self.stats[key] = value + if self.stats_enabled: + for key, value in kwds.items(): + if key in self.stats: + self.stats[key] += value + else: + self.stats[key] = value def stats_summary(self) -> Mapping[str, object]: return self.stats @@ -1463,14 +1468,23 @@ def read_deps_cache(manager: BuildManager, graph: Graph) -> dict[str, FgDepMeta] return module_deps_metas -def _load_ff_file(file: str, manager: BuildManager, log_error: str) -> bytes | None: - t0 = time.time() +def _load_ff_file( + file: str, manager: BuildManager, log_error_fmt: str, id: str | None +) -> bytes | None: + if manager.stats_enabled: + t0 = time.time() try: data = manager.metastore.read(file) except OSError: - manager.log(log_error + file) + if manager.logging_enabled: + if id: + message = log_error_fmt.format(id) + file + else: + message = log_error_fmt + file + manager.log(message) return None - manager.add_stats(metastore_read_time=time.time() - t0) + if manager.stats_enabled: + manager.add_stats(metastore_read_time=time.time() - t0) return data @@ -1638,11 +1652,15 @@ def find_cache_meta( """ # TODO: May need to take more build options into account meta_file, data_file, _ = get_cache_names(id, path, manager.options) - manager.trace(f"Looking for {id} at {meta_file}") + if manager.tracing_enabled: + manager.trace(f"Looking for {id} at {meta_file}") meta: bytes | dict[str, Any] | None - t0 = time.time() + if manager.stats_enabled: + t0 = time.time() if manager.options.fixed_format_cache: - meta = _load_ff_file(meta_file, manager, log_error=f"Could not load cache for {id}: ") + meta = _load_ff_file( + meta_file, manager, log_error_fmt="Could not load cache for {}: ", id=id + ) if meta is None: return None else: @@ -1659,7 +1677,8 @@ def find_cache_meta( f"Could not load cache for {id}: meta cache is not a dict: {repr(meta)}" ) return None - t1 = time.time() + if manager.stats_enabled: + t1 = time.time() if isinstance(meta, bytes): # If either low-level buffer format or high-level cache layout changed, we # cannot use the cache files, even with --skip-version-check. @@ -1674,10 +1693,11 @@ def find_cache_meta( if m is None: manager.log(f"Metadata abandoned for {id}: cannot deserialize data") return None - t2 = time.time() - manager.add_stats( - load_meta_time=t2 - t0, load_meta_load_time=t1 - t0, load_meta_from_dict_time=t2 - t1 - ) + if manager.stats_enabled: + t2 = time.time() + manager.add_stats( + load_meta_time=t2 - t0, load_meta_load_time=t1 - t0, load_meta_from_dict_time=t2 - t1 + ) if skip_validation: return m @@ -1754,7 +1774,8 @@ def validate_meta( manager.log(f"Metadata abandoned for {id}: errors were previously ignored") return None - t0 = time.time() + if manager.stats_enabled: + t0 = time.time() bazel = manager.options.bazel assert path is not None, "Internal error: meta was provided without a path" if not manager.options.skip_cache_mtime_checks: @@ -1779,7 +1800,8 @@ def validate_meta( manager.log(f"Metadata abandoned for {id}: file or directory {path} does not exist") return None - manager.add_stats(validate_stat_time=time.time() - t0) + if manager.stats_enabled: + manager.add_stats(validate_stat_time=time.time() - t0) # When we are using a fine-grained cache, we want our initial # build() to load all of the cache information and then do a @@ -1835,25 +1857,31 @@ def validate_meta( manager.log(f"Metadata abandoned for {id}: file {path} has different hash") return None else: - t0 = time.time() + if manager.stats_enabled: + t0 = time.time() # Optimization: update mtime and path (otherwise, this mismatch will reappear). meta.mtime = mtime meta.path = path meta.size = size meta.options = options_snapshot(id, manager) meta_file, _, _ = get_cache_names(id, path, manager.options) - manager.log( - "Updating mtime for {}: file {}, meta {}, mtime {}".format( - id, path, meta_file, meta.mtime + if manager.logging_enabled: + manager.log( + "Updating mtime for {}: file {}, meta {}, mtime {}".format( + id, path, meta_file, meta.mtime + ) ) - ) write_cache_meta(meta, manager, meta_file) - t1 = time.time() - manager.add_stats(validate_update_time=time.time() - t1, validate_munging_time=t1 - t0) + if manager.stats_enabled: + t1 = time.time() + manager.add_stats( + validate_update_time=time.time() - t1, validate_munging_time=t1 - t0 + ) return meta # It's a match on (id, path, size, hash, mtime). - manager.log(f"Metadata fresh for {id}: file {path}") + if manager.logging_enabled: + manager.log(f"Metadata fresh for {id}: file {path}") return meta @@ -2322,9 +2350,11 @@ def new_state( if path and source is None and manager.fscache.isdir(path): source = "" - t0 = time.time() + if manager.stats_enabled: + t0 = time.time() meta = validate_meta(meta, id, path, ignore_all, manager) - manager.add_stats(validate_meta_time=time.time() - t0) + if manager.stats_enabled: + manager.add_stats(validate_meta_time=time.time() - t0) if meta: # Make copies, since we may modify these and want to @@ -2672,7 +2702,7 @@ def load_tree(self, temporary: bool = False) -> None: data: bytes | dict[str, Any] | None if self.options.fixed_format_cache: - data = _load_ff_file(data_file, self.manager, "Could not load tree: ") + data = _load_ff_file(data_file, self.manager, "Could not load tree: ", None) else: data = _load_json_file(data_file, self.manager, "Load tree ", "Could not load tree: ") if data is None: @@ -3349,9 +3379,11 @@ def exist_removed_submodules(dependencies: list[str], manager: BuildManager) -> def find_module_simple(id: str, manager: BuildManager) -> str | None: """Find a filesystem path for module `id` or `None` if not found.""" - t0 = time.time() + if manager.stats_enabled: + t0 = time.time() x = manager.find_module_cache.find_module(id, fast_path=True) - manager.add_stats(find_module_time=time.time() - t0, find_module_calls=1) + if manager.stats_enabled: + manager.add_stats(find_module_time=time.time() - t0, find_module_calls=1) if isinstance(x, ModuleNotFoundReason): return None return x @@ -3359,9 +3391,11 @@ def find_module_simple(id: str, manager: BuildManager) -> str | None: def find_module_with_reason(id: str, manager: BuildManager) -> ModuleSearchResult: """Find a filesystem path for module `id` or the reason it can't be found.""" - t0 = time.time() + if manager.stats_enabled: + t0 = time.time() x = manager.find_module_cache.find_module(id, fast_path=False) - manager.add_stats(find_module_time=time.time() - t0, find_module_calls=1) + if manager.stats_enabled: + manager.add_stats(find_module_time=time.time() - t0, find_module_calls=1) return x @@ -3476,6 +3510,9 @@ def skipping_ancestor(manager: BuildManager, id: str, path: str, ancestor_for: S def log_configuration(manager: BuildManager, sources: list[BuildSource]) -> None: """Output useful configuration information to LOG and TRACE""" + if not manager.logging_enabled: + return + config_file = manager.options.config_file if config_file: config_file = os.path.abspath(config_file) @@ -3938,23 +3975,26 @@ def find_stale_sccs( if stale_indirect is not None: fresh = False - if fresh: - fresh_msg = "fresh" - elif stale_scc: - fresh_msg = "inherently stale" - if stale_scc != ascc.mod_ids: - fresh_msg += f" ({' '.join(sorted(stale_scc))})" - if stale_deps: - fresh_msg += f" with stale deps ({' '.join(sorted(stale_deps))})" - elif stale_deps: - fresh_msg = f"stale due to deps ({' '.join(sorted(stale_deps))})" - else: - assert stale_indirect is not None - fresh_msg = f"stale due to stale indirect dep(s): first {stale_indirect}" + if manager.logging_enabled: + if fresh: + fresh_msg = "fresh" + elif stale_scc: + fresh_msg = "inherently stale" + if stale_scc != ascc.mod_ids: + fresh_msg += f" ({' '.join(sorted(stale_scc))})" + if stale_deps: + fresh_msg += f" with stale deps ({' '.join(sorted(stale_deps))})" + elif stale_deps: + fresh_msg = f"stale due to deps ({' '.join(sorted(stale_deps))})" + else: + assert stale_indirect is not None + fresh_msg = f"stale due to stale indirect dep(s): first {stale_indirect}" - scc_str = " ".join(ascc.mod_ids) + if manager.logging_enabled: + scc_str = " ".join(ascc.mod_ids) if fresh: - manager.trace(f"Found {fresh_msg} SCC ({scc_str})") + if manager.tracing_enabled: + manager.trace(f"Found {fresh_msg} SCC ({scc_str})") # If there is at most one file with errors we can skip the ordering to save time. mods_with_errors = [id for id in ascc.mod_ids if graph[id].error_lines] if len(mods_with_errors) <= 1: @@ -3971,11 +4011,14 @@ def find_stale_sccs( manager.flush_errors(path, formatted, False) fresh_sccs.append(ascc) else: - size = len(ascc.mod_ids) - if size == 1: - manager.log(f"Scheduling SCC singleton ({scc_str}) as {fresh_msg}") - else: - manager.log("Scheduling SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg)) + if manager.logging_enabled: + size = len(ascc.mod_ids) + if size == 1: + manager.log(f"Scheduling SCC singleton ({scc_str}) as {fresh_msg}") + else: + manager.log( + "Scheduling SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg) + ) stale_sccs.append(ascc) return stale_sccs, fresh_sccs From 9d1bf7ce9e1d68bfd686f13d54e989fe45b273df Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Tue, 17 Feb 2026 16:33:13 +0000 Subject: [PATCH 2/4] We need add_stats even when not collecting build stats --- mypy/build.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index ba772617647a..a49317e923d3 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1103,12 +1103,11 @@ def trace(self, *message: str) -> None: self.stderr.flush() def add_stats(self, **kwds: Any) -> None: - if self.stats_enabled: - for key, value in kwds.items(): - if key in self.stats: - self.stats[key] += value - else: - self.stats[key] = value + for key, value in kwds.items(): + if key in self.stats: + self.stats[key] += value + else: + self.stats[key] = value def stats_summary(self) -> Mapping[str, object]: return self.stats From b2b52c9889e87b0a7479a133b2f806a20f6babfe Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Tue, 17 Feb 2026 16:37:12 +0000 Subject: [PATCH 3/4] Update comment --- mypy/build.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index a49317e923d3..1b828882e926 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -799,7 +799,9 @@ def __init__( if not isinstance(plugin, ChainedPlugin): plugin = ChainedPlugin(options, [plugin]) self.plugin = plugin - # These allow quickly skipping logging and stats collection calls + # These allow quickly skipping logging and stats collection calls. Note + # that some stats impact mypy behavior, so be careful when skipping stats + # collection calls. self.stats_enabled = self.options.dump_build_stats self.logging_enabled = self.options.verbosity >= 1 self.tracing_enabled = self.options.verbosity >= 2 From 12e79e869ab25521a69cb523854121a7e3a35cda Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Tue, 17 Feb 2026 16:40:44 +0000 Subject: [PATCH 4/4] Simplify --- mypy/build.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 1b828882e926..fabf47383b2d 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -3990,9 +3990,8 @@ def find_stale_sccs( else: assert stale_indirect is not None fresh_msg = f"stale due to stale indirect dep(s): first {stale_indirect}" - - if manager.logging_enabled: scc_str = " ".join(ascc.mod_ids) + if fresh: if manager.tracing_enabled: manager.trace(f"Found {fresh_msg} SCC ({scc_str})")