diff --git a/BACKLOG.md b/BACKLOG.md index 6d4a342..0f081be 100644 --- a/BACKLOG.md +++ b/BACKLOG.md @@ -11,7 +11,7 @@ This backlog collects product and maintenance ideas from project research. ## P1 - Adoption Workflow -- Add an `.archignore` or similar file, modeled after `.gitignore`, for files that should never be analyzed. +- [x] Add an `.archignore` or similar file, modeled after `.gitignore`, for files that should never be analyzed. - Add a `.because(...)` API so rules can carry user-facing rationale into failure messages and generated architecture documentation. - Add configuration-file support for common rules, while keeping the fluent Python API as the primary interface. - Add support for monorepo and multi-package Python projects. diff --git a/README.md b/README.md index 43f827a..0c6de07 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,25 @@ options = CheckOptions( violations = rule.check(options) ``` +### Excluding Files With `.archignore` + +Add a `.archignore` file to your project root to permanently exclude generated or +irrelevant files from architecture checks and file-based metrics: + +```gitignore +# Generated code +generated/ + +# Migration scripts +migrations/*.py + +# A single root-level file +/legacy_adapter.py +``` + +Patterns support comments, blank lines, glob syntax, root-relative paths, path +patterns, and directory patterns with a trailing `/`. + ## 🐹 Use Cases Here is an overview of common use cases. diff --git a/scripts/check_release_metadata.py b/scripts/check_release_metadata.py index 3c2f015..36e438e 100644 --- a/scripts/check_release_metadata.py +++ b/scripts/check_release_metadata.py @@ -17,7 +17,7 @@ def read_project_version() -> str: content = PYPROJECT.read_text(encoding="utf-8") match = re.search(r'^version = "([^"]+)"$', content, re.MULTILINE) if match is None: - raise RuntimeError("Could not find project.version in pyproject.toml") + raise RuntimeError("Could not find [project].version in pyproject.toml") return match.group(1) diff --git a/src/archunitpython/common/extraction/extract_graph.py b/src/archunitpython/common/extraction/extract_graph.py index 4d05d72..36681a4 100644 --- a/src/archunitpython/common/extraction/extract_graph.py +++ b/src/archunitpython/common/extraction/extract_graph.py @@ -27,6 +27,8 @@ "*.egg-info", ] +_ARCHIGNORE_FILE = ".archignore" + def clear_graph_cache(options: CheckOptions | None = None) -> None: """Clear the cached dependency graphs.""" @@ -58,7 +60,7 @@ def extract_graph( project_path = os.getcwd() project_path = os.path.abspath(project_path) - excludes = list(exclude_patterns) if exclude_patterns is not None else list(_DEFAULT_EXCLUDE) + excludes = _resolve_exclude_patterns(project_path, exclude_patterns) ignore_type_checking_imports = bool( options and options.ignore_type_checking_imports ) @@ -94,6 +96,34 @@ def _build_cache_key( ) +def _resolve_exclude_patterns( + project_path: str, + exclude_patterns: list[str] | None, +) -> list[str]: + """Resolve exclude patterns (explicit or defaults) plus any .archignore patterns.""" + excludes = list(exclude_patterns) if exclude_patterns is not None else list(_DEFAULT_EXCLUDE) + excludes.extend(_load_archignore_patterns(project_path)) + return excludes + + +def _load_archignore_patterns(project_path: str) -> list[str]: + """Load .archignore patterns from a project root, if present.""" + archignore_path = os.path.join(project_path, _ARCHIGNORE_FILE) + try: + with open(archignore_path, "r", encoding="utf-8", errors="replace") as f: + lines = f.readlines() + except OSError: + return [] + + patterns: list[str] = [] + for line in lines: + pattern = line.strip() + if not pattern or pattern.startswith("#"): + continue + patterns.append(pattern) + return patterns + + def _extract_graph_uncached( project_path: str, exclude_patterns: list[str], @@ -105,6 +135,7 @@ def _extract_graph_uncached( edges: list[Edge] = [] py_files_set = set(py_files) + normalized_py_files = {_normalize(f) for f in py_files_set} for file_path in py_files: # Add self-referencing edge (ensures the file appears as a node) @@ -129,10 +160,8 @@ def _extract_graph_uncached( ) if resolved and resolved != _normalize(file_path): # Check if the resolved path is in our project - if not is_external and resolved not in { - _normalize(f) for f in py_files_set - }: - is_external = True + if not is_external and resolved not in normalized_py_files: + continue edges.append( Edge( @@ -154,29 +183,65 @@ def _normalize(path: str) -> str: def _find_python_files(root: str, exclude: list[str]) -> list[str]: """Recursively find all .py files, excluding specified patterns.""" py_files: list[str] = [] + root = os.path.abspath(root) for dirpath, dirnames, filenames in os.walk(root): # Filter out excluded directories in-place dirnames[:] = [ d for d in dirnames - if not _should_exclude(d, exclude) + if not _should_exclude_path(os.path.join(dirpath, d), root, exclude, is_dir=True) ] for filename in filenames: - if filename.endswith(".py") and not _should_exclude(filename, exclude): - full_path = os.path.join(dirpath, filename) + full_path = os.path.join(dirpath, filename) + if filename.endswith(".py") and not _should_exclude_path( + full_path, root, exclude, is_dir=False + ): py_files.append(os.path.abspath(full_path)) return py_files -def _should_exclude(name: str, patterns: list[str]) -> bool: - """Check if a name matches any exclude pattern.""" +def _should_exclude_path( + path: str, + root: str, + patterns: list[str], + *, + is_dir: bool, +) -> bool: + """Check if a path matches any exclude pattern.""" import fnmatch - for pattern in patterns: - if fnmatch.fnmatch(name, pattern): + rel_path = _normalize(os.path.relpath(path, root)) + name = os.path.basename(path) + + for raw_pattern in patterns: + pattern = raw_pattern.strip().replace("\\", "/") + if not pattern or pattern.startswith("#"): + continue + + pattern = pattern.removeprefix("./") + anchored = pattern.startswith("/") + if anchored: + pattern = pattern[1:] + + dir_only = pattern.endswith("/") + if dir_only: + pattern = pattern.rstrip("/") + if not is_dir: + continue + + if not pattern: + continue + + if "/" in pattern or anchored: + if fnmatch.fnmatch(rel_path, pattern): + return True + if is_dir and rel_path == pattern: + return True + elif fnmatch.fnmatch(name, pattern): return True + return False diff --git a/src/archunitpython/metrics/extraction/extract_class_info.py b/src/archunitpython/metrics/extraction/extract_class_info.py index 24ed42e..ab29ee4 100644 --- a/src/archunitpython/metrics/extraction/extract_class_info.py +++ b/src/archunitpython/metrics/extraction/extract_class_info.py @@ -5,7 +5,10 @@ import ast import os -from archunitpython.common.extraction.extract_graph import _DEFAULT_EXCLUDE, _find_python_files +from archunitpython.common.extraction.extract_graph import ( + _find_python_files, + _resolve_exclude_patterns, +) from archunitpython.metrics.common.types import ( ClassInfo, EnhancedClassInfo, @@ -33,7 +36,7 @@ def extract_class_info( project_path = os.getcwd() project_path = os.path.abspath(project_path) - excludes = exclude_patterns if exclude_patterns is not None else _DEFAULT_EXCLUDE + excludes = _resolve_exclude_patterns(project_path, exclude_patterns) py_files = _find_python_files(project_path, excludes) classes: list[ClassInfo] = [] @@ -53,7 +56,7 @@ def extract_enhanced_class_info( project_path = os.getcwd() project_path = os.path.abspath(project_path) - excludes = exclude_patterns if exclude_patterns is not None else _DEFAULT_EXCLUDE + excludes = _resolve_exclude_patterns(project_path, exclude_patterns) py_files = _find_python_files(project_path, excludes) results: list[FileAnalysisResult] = [] diff --git a/src/archunitpython/metrics/fluentapi/metrics.py b/src/archunitpython/metrics/fluentapi/metrics.py index 27107fd..7a60c54 100644 --- a/src/archunitpython/metrics/fluentapi/metrics.py +++ b/src/archunitpython/metrics/fluentapi/metrics.py @@ -267,13 +267,13 @@ def check(self, options: CheckOptions | None = None) -> list[Violation]: import os from archunitpython.common.extraction.extract_graph import ( - _DEFAULT_EXCLUDE, _find_python_files, + _resolve_exclude_patterns, ) project = self._project_path or os.getcwd() project = os.path.abspath(project) - files = _find_python_files(project, _DEFAULT_EXCLUDE) + files = _find_python_files(project, _resolve_exclude_patterns(project, None)) violations: list[Violation] = [] for file_path in files: diff --git a/tests/common/test_extract_graph.py b/tests/common/test_extract_graph.py index 5339658..a01c080 100644 --- a/tests/common/test_extract_graph.py +++ b/tests/common/test_extract_graph.py @@ -11,6 +11,7 @@ _extract_imports, _find_python_files, _normalize, + _resolve_exclude_patterns, clear_graph_cache, extract_graph, ) @@ -132,6 +133,73 @@ def test_edge_has_import_kinds(self): assert len(edges_with_kinds) > 0 +class TestArchignore: + def setup_method(self): + clear_graph_cache() + self._temp_dir = Path(__file__).resolve().parent / ".tmp" / f"project_{uuid4().hex}" + self._temp_dir.mkdir(parents=True) + + def teardown_method(self): + shutil.rmtree(self._temp_dir, ignore_errors=True) + + def _write(self, relative_path: str, content: str = "") -> None: + path = self._temp_dir / relative_path + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + + def test_archignore_excludes_files_and_directories(self): + self._write( + ".archignore", + "\n".join( + [ + "# Ignore generated architecture-test inputs", + "ignored.py", + "generated/", + "nested/*.py", + "/root_ignored.py", + ] + ), + ) + self._write("keep.py") + self._write("ignored.py") + self._write("root_ignored.py") + self._write("generated/generated.py") + self._write("nested/ignored_nested.py") + + excludes = _resolve_exclude_patterns(str(self._temp_dir), ["__pycache__"]) + files = _find_python_files(str(self._temp_dir), excludes) + relative_files = { + Path(file_path).relative_to(self._temp_dir).as_posix() + for file_path in files + } + + assert relative_files == {"keep.py"} + + def test_archignore_ignored_files_are_not_dependency_targets(self): + self._write(".archignore", "ignored.py\n") + self._write("keep.py", "import ignored\n") + self._write("ignored.py", "VALUE = 1\n") + + graph = extract_graph(str(self._temp_dir)) + targets = {edge.target for edge in graph} + + ignored_path = _normalize(str((self._temp_dir / "ignored.py").resolve())) + assert ignored_path not in targets + + def test_archignore_with_invalid_utf8_bytes_does_not_abort_extraction(self): + (self._temp_dir / ".archignore").write_bytes(b"ignored.py\n\xff\n") + self._write("keep.py") + self._write("ignored.py") + + graph = extract_graph(str(self._temp_dir)) + sources = {edge.source for edge in graph} + + keep_path = _normalize(str((self._temp_dir / "keep.py").resolve())) + ignored_path = _normalize(str((self._temp_dir / "ignored.py").resolve())) + assert keep_path in sources + assert ignored_path not in sources + + class TestTypeCheckingImportHandling: def setup_method(self): clear_graph_cache() diff --git a/tests/metrics/test_metrics_fluentapi.py b/tests/metrics/test_metrics_fluentapi.py index fd812ec..e21d3d7 100644 --- a/tests/metrics/test_metrics_fluentapi.py +++ b/tests/metrics/test_metrics_fluentapi.py @@ -1,6 +1,9 @@ """Tests for the metrics fluent API.""" import os +import shutil +from pathlib import Path +from uuid import uuid4 from archunitpython.metrics.assertion.metric_thresholds import ( FileCountViolation, @@ -58,6 +61,29 @@ def test_lines_of_code_violation(self): assert len(file_violations) > 0 +class TestMetricsArchignore: + def setup_method(self): + self._temp_dir = Path(__file__).resolve().parent / ".tmp" / f"project_{uuid4().hex}" + self._temp_dir.mkdir(parents=True) + + def teardown_method(self): + shutil.rmtree(self._temp_dir, ignore_errors=True) + + def test_file_metrics_respect_archignore(self): + (self._temp_dir / ".archignore").write_text("ignored.py\n", encoding="utf-8") + (self._temp_dir / "keep.py").write_text("VALUE = 1\n", encoding="utf-8") + (self._temp_dir / "ignored.py").write_text( + "\n".join(f"VALUE_{i} = {i}" for i in range(20)), + encoding="utf-8", + ) + + violations = ( + metrics(str(self._temp_dir)).count().lines_of_code().should_be_below(5).check() + ) + + assert violations == [] + + class TestLCOMMetricsFluentAPI: def test_lcom96b_below(self): violations = (