refactor(goal): CLI interface improvements

tom-sapletta-com · tom-sapletta-com · commit 29150212db1d · 2026-02-25T19:32:59.000+01:00
changes:
  - file: cli.py
    area: cli
    modified: [main]
  - file: metrics.py
    area: quality
    added: [_count_elements_ast]
    modified: [ReproductionMetrics, _compute_structural_metrics]
    removed: [count_elements]
  - file: toon_format.py
    area: core
    added: [_item_count, generate_hybrid]
    modified: [TOONGenerator]

dependencies:
  flow: "cli→toon_format"
  - cli.py -&gt; toon_format.py

stats:
  lines: "+159/-25 (net +134)"
  files: 3
  complexity: "Large structural change (normalized)"
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,16 @@
+## [1.0.43] - 2026-02-25
+
+### Summary
+
+refactor(goal): CLI interface improvements
+
+### Other
+
+- update code2logic/cli.py
+- update code2logic/metrics.py
+- update code2logic/toon_format.py
+
+
 ## [1.0.42] - 2026-02-25
 
 ### Summary
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.0.42
+1.0.43
diff --git a/code2logic/__init__.py b/code2logic/__init__.py
@@ -18,7 +18,7 @@
     >>> print(output)
 """
 
-__version__ = "1.0.42"
+__version__ = "1.0.43"
 __author__ = "Softreck"
 __email__ = "info@softreck.dev"
 __license__ = "MIT"
diff --git a/code2logic/cli.py b/code2logic/cli.py
@@ -978,11 +978,18 @@ def _maybe_print_pretty_help() -> bool:
         # For TOON, --compact means ultra-compact format
         compact = args.compact if hasattr(args, 'compact') else False
         ultra_compact = args.ultra_compact if hasattr(args, 'ultra_compact') else False
+        use_hybrid = args.hybrid if hasattr(args, 'hybrid') else False
 
         # Use compact or ultra_compact flag (compact takes precedence for TOON)
         use_ultra_compact = ultra_compact or compact
 
-        if use_ultra_compact:
+        if use_hybrid:
+            output = generator.generate_hybrid(
+                project,
+                detail='full',
+                no_repeat_name=args.no_repeat_module,
+            )
+        elif use_ultra_compact:
             output = generator.generate_ultra_compact(project)
         else:
             detail_map = {
diff --git a/code2logic/metrics.py b/code2logic/metrics.py
@@ -303,23 +303,11 @@ def _cosine_similarity(self, words1: List[str], words2: List[str]) -> float:
         return (dot_product / (magnitude1 * magnitude2)) * 100
 
     def _compute_structural_metrics(self, original: str, generated: str) -> StructuralMetrics:
-        """Compute structural metrics."""
+        """Compute structural metrics using AST when possible, regex as fallback."""
         metrics = StructuralMetrics()
 
-        # Count elements
-        def count_elements(code: str) -> Dict[str, int]:
-            return {
-                'classes': len(re.findall(r'^class\s+\w+', code, re.MULTILINE)),
-                'functions': len(re.findall(r'^(?:async\s+)?def\s+\w+', code, re.MULTILINE)),
-                'methods': len(re.findall(r'^\s+(?:async\s+)?def\s+\w+', code, re.MULTILINE)),
-                'imports': len(re.findall(r'^(?:from|import)\s+', code, re.MULTILINE)),
-                # Capture both annotated attributes and simple assignments.
-                # This is still heuristic, but avoids undercounting common code.
-                'attributes': len(re.findall(r'^\s+\w+\s*(?::\s*[^=\n]+)?\s*=', code, re.MULTILINE)),
-            }
-
-        orig = count_elements(original)
-        gen = count_elements(generated)
+        orig = self._count_elements_ast(original)
+        gen = self._count_elements_ast(generated)
 
         metrics.classes_original = orig['classes']
         metrics.classes_generated = gen['classes']
@@ -341,15 +329,15 @@ def count_elements(code: str) -> Dict[str, int]:
         metrics.attributes_generated = gen['attributes']
         metrics.attributes_match = orig['attributes'] == gen['attributes']
 
-        # Structural score
-        matches = sum([
-            metrics.classes_match,
-            metrics.functions_match,
-            metrics.methods_match,
-            metrics.imports_match,
-            metrics.attributes_match,
-        ])
-        metrics.structural_score = (matches / 5) * 100
+        # Ratio-based structural score (partial credit instead of binary)
+        total = 0.0
+        for key in ('classes', 'functions', 'methods', 'imports', 'attributes'):
+            ov, gv = orig[key], gen[key]
+            if ov == 0 and gv == 0:
+                total += 1.0
+            elif max(ov, gv) > 0:
+                total += min(ov, gv) / max(ov, gv)
+        metrics.structural_score = (total / 5) * 100
 
         # Element coverage
         total_orig = sum(orig.values())
@@ -359,6 +347,59 @@ def count_elements(code: str) -> Dict[str, int]:
 
         return metrics
 
+    @staticmethod
+    def _count_elements_ast(code: str) -> Dict[str, int]:
+        """Count structural elements using Python AST, with regex fallback."""
+        import ast as _ast
+
+        try:
+            tree = _ast.parse(code)
+        except SyntaxError:
+            # Fallback to regex for unparseable code
+            return {
+                'classes': len(re.findall(r'^class\s+\w+', code, re.MULTILINE)),
+                'functions': len(re.findall(r'^(?:async\s+)?def\s+\w+', code, re.MULTILINE)),
+                'methods': len(re.findall(r'^\s+(?:async\s+)?def\s+\w+', code, re.MULTILINE)),
+                'imports': len(re.findall(r'^(?:from|import)\s+', code, re.MULTILINE)),
+                'attributes': len(re.findall(r'^\s+\w+\s*(?::\s*[^=\n]+)?\s*=', code, re.MULTILINE)),
+            }
+
+        classes = 0
+        functions = 0
+        methods = 0
+        imports = 0
+        attributes = 0
+
+        for node in _ast.walk(tree):
+            if isinstance(node, _ast.ClassDef):
+                classes += 1
+                # Count methods inside classes
+                for item in node.body:
+                    if isinstance(item, (_ast.FunctionDef, _ast.AsyncFunctionDef)):
+                        methods += 1
+                    # Count class-level attributes (annotated or assigned)
+                    elif isinstance(item, (_ast.Assign, _ast.AnnAssign)):
+                        attributes += 1
+            elif isinstance(node, (_ast.FunctionDef, _ast.AsyncFunctionDef)):
+                # Only count as top-level function if not inside a class
+                # (methods already counted above)
+                pass
+            elif isinstance(node, (_ast.Import, _ast.ImportFrom)):
+                imports += 1
+
+        # Count top-level functions (not methods)
+        for node in _ast.iter_child_nodes(tree):
+            if isinstance(node, (_ast.FunctionDef, _ast.AsyncFunctionDef)):
+                functions += 1
+
+        return {
+            'classes': classes,
+            'functions': functions,
+            'methods': methods,
+            'imports': imports,
+            'attributes': attributes,
+        }
+
     def _compute_semantic_metrics(self, original: str, generated: str) -> SemanticMetrics:
         """Compute semantic preservation metrics."""
         metrics = SemanticMetrics()
diff --git a/code2logic/toon_format.py b/code2logic/toon_format.py
@@ -132,6 +132,92 @@ def generate(self, project: ProjectInfo, detail: str = 'standard', no_repeat_nam
 
         return '\n'.join(lines)
 
+    def generate_hybrid(
+        self,
+        project: ProjectInfo,
+        detail: str = 'full',
+        no_repeat_name: bool = True,
+        hub_top_n: int = 5,
+        hub_functions_detail: str = 'full',
+    ) -> str:
+        """Generate TOON-Hybrid: project structure + function-logic for hub modules.
+
+        Combines project-level TOON (classes, imports, structure) with
+        selective function-logic details for the most important modules.
+
+        Args:
+            project: Analyzed project info
+            detail: Detail level for project structure
+            no_repeat_name: Compress repeated directory prefixes
+            hub_top_n: Number of top modules to include function details for
+            hub_functions_detail: Detail level for function-logic ('standard', 'full')
+
+        Returns:
+            Hybrid TOON string
+        """
+        from .function_logic import FunctionLogicGenerator
+        from .shared_utils import remove_self_from_params
+
+        # Generate base project TOON
+        base = self.generate(project, detail=detail, no_repeat_name=no_repeat_name)
+
+        # Identify hub modules: use dependency_metrics if available, otherwise sort by function count
+        hub_paths: set = set()
+        dep_metrics = getattr(project, 'dependency_metrics', {}) or {}
+        if dep_metrics:
+            ranked = sorted(dep_metrics.items(), key=lambda x: getattr(x[1], 'pagerank', 0), reverse=True)
+            hub_paths = {path for path, node in ranked[:hub_top_n]}
+        else:
+            # Fallback: rank by total functions + methods
+            def _item_count(m):
+                return len(getattr(m, 'functions', []) or []) + sum(
+                    len(getattr(c, 'methods', []) or []) for c in (getattr(m, 'classes', []) or [])
+                )
+            ranked_modules = sorted(project.modules, key=_item_count, reverse=True)
+            hub_paths = {m.path for m in ranked_modules[:hub_top_n]}
+
+        if not hub_paths:
+            return base
+
+        # Generate function-logic section for hub modules only
+        hub_modules = [m for m in project.modules if m.path in hub_paths]
+        if not hub_modules:
+            return base
+
+        logic_gen = FunctionLogicGenerator()
+        lines = [base, "", "# === Hub Module Function Details ==="]
+
+        for m in hub_modules:
+            items = logic_gen._module_items(m)
+            if not items:
+                continue
+            lines.append(f"  {self._quote(m.path)}:")
+
+            # Emit class context
+            classes = getattr(m, 'classes', []) or []
+            for cls in classes:
+                bases = ','.join(getattr(cls, 'bases', []) or []) or '-'
+                lines.append(f"    CLASS {self._quote(cls.name)}({bases})")
+
+            # Emit function table
+            header = f"line{self.delim_marker}name{self.delim_marker}sig{self.delim_marker}does"
+            lines.append(f"    functions[{len(items)}]{{{header}}}:")
+
+            for kind, qname, func in items:
+                sig = logic_gen._build_sig(func, include_async_prefix=False, language=m.language)
+                start_line = str(getattr(func, 'start_line', 0) or 0)
+                display_name = qname
+                if getattr(func, 'is_async', False):
+                    display_name = f"~{qname}"
+                cc = getattr(func, 'complexity', 1) or 1
+                if cc > 1:
+                    display_name = f"{display_name} cc:{cc}"
+                does = logic_gen._build_does(func)
+                row = [start_line, self._quote(display_name), self._quote(sig), self._quote(does)]
+                lines.append(f"      {self.delimiter.join(row)}")
+
+        return '\n'.join(lines)
+
     def _generate_modules(self, modules: List[ModuleInfo], detail: str, no_repeat_name: bool = False) -> List[str]:
         """Generate modules section."""
         lines = []
diff --git a/logic2code/__init__.py b/logic2code/__init__.py
@@ -14,5 +14,5 @@
 from .generator import CodeGenerator, GeneratorConfig, GenerationResult
 from .renderers import PythonRenderer
 
-__version__ = '1.0.42'
+__version__ = '1.0.43'
 __all__ = ['CodeGenerator', 'GeneratorConfig', 'GenerationResult', 'PythonRenderer']
diff --git a/logic2test/__init__.py b/logic2test/__init__.py
@@ -15,5 +15,5 @@
 from .parsers import LogicParser
 from .templates import TestTemplate
 
-__version__ = '1.0.42'
+__version__ = '1.0.43'
 __all__ = ['TestGenerator', 'GeneratorConfig', 'GenerationResult', 'LogicParser', 'TestTemplate']
diff --git a/lolm/__init__.py b/lolm/__init__.py
@@ -76,7 +76,7 @@
 )
 from .clients import LLMRateLimitError
 
-__version__ = '1.0.42'
+__version__ = '1.0.43'
 __all__ = [
     # Config
     'LLMConfig',
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "code2logic"
-version = "1.0.42"
+version = "1.0.43"
 description = "Code2Logic - Source code to logical representation converter for LLM analysis, featuring Tree-sitter parsing, dependency graph analysis, and multi-language support."
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/tests/samples/sample_reexport/__init__.py b/tests/samples/sample_reexport/__init__.py
@@ -18,4 +18,4 @@
     "ProcessingError",
 ]
 
-__version__ = "1.0.42"
+__version__ = "1.0.43"

Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,7 @@`
`76`	`76`	`)`
`77`	`77`	`from .clients import LLMRateLimitError`
`78`	`78`
`79`		`-__version__ = '1.0.42'`
	`79`	`+__version__ = '1.0.43'`
`80`	`80`	`__all__ = [`
`81`	`81`	`# Config`
`82`	`82`	`'LLMConfig',`