From 4710e6e53a9dd6e69483d58971339d383a54d525 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 19 May 2026 06:52:53 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Use=20C-based=20PyYAML=20lo?= =?UTF-8?q?aders/dumpers=20for=20performance=20improvement?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaced standard `yaml.safe_load`, `yaml.safe_dump`, and `yaml.dump` calls with custom functions from a new `yaml_utils` module. These utilities attempt to load `CSafeLoader`, `CSafeDumper`, and `CDumper` which leverage the `libyaml` C extension for much faster parsing and serialization, falling back gracefully to pure Python implementations if unavailable. Co-authored-by: M7FX-1 <258925851+M7FX-1@users.noreply.github.com> --- .jules/bolt.md | 3 +++ src/specify_cli/__init__.py | 24 ++++++++++-------- src/specify_cli/agents.py | 5 ++-- src/specify_cli/extensions.py | 22 ++++++++-------- src/specify_cli/integrations/base.py | 13 ++++++---- src/specify_cli/integrations/catalog.py | 6 +++-- .../integrations/claude/__init__.py | 5 ++-- src/specify_cli/presets.py | 20 ++++++++------- src/specify_cli/workflows/catalog.py | 11 ++++---- src/specify_cli/workflows/engine.py | 9 +++---- src/specify_cli/yaml_utils.py | 25 +++++++++++++++++++ 11 files changed, 90 insertions(+), 53 deletions(-) create mode 100644 .jules/bolt.md create mode 100644 src/specify_cli/yaml_utils.py diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000000..6f91a80f22 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-05-19 - Fast YAML Loading using C Extensions in PyYAML +**Learning:** PyYAML's default `safe_load` and `safe_dump` are written in pure Python. When the `libyaml` C library is available, PyYAML provides `CSafeLoader` and `CSafeDumper` which offer significantly faster YAML parsing and serialization. This is a critical performance detail in IO-bound CLI tools that heavily rely on parsing configuration files and manifests (like Spec Kit). +**Action:** Created `src/specify_cli/yaml_utils.py` to transparently select the fastest available YAML loaders and dumpers, gracefully falling back to pure Python if C extensions are unavailable. Always use these custom utility functions over `yaml.safe_load`/`yaml.safe_dump` directly. diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 77611128b5..e1a777c57d 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -41,6 +41,8 @@ import yaml from pathlib import Path +from .yaml_utils import yaml_safe_load, yaml_dump + from packaging.version import InvalidVersion, Version from typing import Any, Optional @@ -3123,7 +3125,7 @@ def preset_catalog_add( # Load existing config if config_path.exists(): try: - config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + config = yaml_safe_load(config_path.read_text(encoding="utf-8")) or {} except Exception as e: console.print(f"[red]Error:[/red] Failed to read {config_path}: {e}") raise typer.Exit(1) @@ -3151,7 +3153,7 @@ def preset_catalog_add( }) config["catalogs"] = catalogs - config_path.write_text(yaml.dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True), encoding="utf-8") + config_path.write_text(yaml_dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True), encoding="utf-8") install_label = "install allowed" if install_allowed else "discovery only" console.print(f"\n[green]✓[/green] Added catalog '[bold]{name}[/bold]' ({install_label})") @@ -3179,7 +3181,7 @@ def preset_catalog_remove( raise typer.Exit(1) try: - config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + config = yaml_safe_load(config_path.read_text(encoding="utf-8")) or {} except Exception: console.print("[red]Error:[/red] Failed to read preset catalog config.") raise typer.Exit(1) @@ -3196,7 +3198,7 @@ def preset_catalog_remove( raise typer.Exit(1) config["catalogs"] = catalogs - config_path.write_text(yaml.dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True), encoding="utf-8") + config_path.write_text(yaml_dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True), encoding="utf-8") console.print(f"[green]✓[/green] Removed catalog '{name}'") if not catalogs: @@ -3465,7 +3467,7 @@ def catalog_add( # Load existing config if config_path.exists(): try: - config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + config = yaml_safe_load(config_path.read_text(encoding="utf-8")) or {} except Exception as e: console.print(f"[red]Error:[/red] Failed to read {config_path}: {e}") raise typer.Exit(1) @@ -3493,7 +3495,7 @@ def catalog_add( }) config["catalogs"] = catalogs - config_path.write_text(yaml.dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True), encoding="utf-8") + config_path.write_text(yaml_dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True), encoding="utf-8") install_label = "install allowed" if install_allowed else "discovery only" console.print(f"\n[green]✓[/green] Added catalog '[bold]{name}[/bold]' ({install_label})") @@ -3521,7 +3523,7 @@ def catalog_remove( raise typer.Exit(1) try: - config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + config = yaml_safe_load(config_path.read_text(encoding="utf-8")) or {} except Exception: console.print("[red]Error:[/red] Failed to read catalog config.") raise typer.Exit(1) @@ -3538,7 +3540,7 @@ def catalog_remove( raise typer.Exit(1) config["catalogs"] = catalogs - config_path.write_text(yaml.dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True), encoding="utf-8") + config_path.write_text(yaml_dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True), encoding="utf-8") console.print(f"[green]✓[/green] Removed catalog '{name}'") if not catalogs: @@ -4282,21 +4284,21 @@ def extension_update( # 6. Validate extension ID from ZIP BEFORE modifying installation # Handle both root-level and nested extension.yml (GitHub auto-generated ZIPs) with zipfile.ZipFile(zip_path, "r") as zf: - import yaml + from .yaml_utils import yaml_safe_load manifest_data = None namelist = zf.namelist() # First try root-level extension.yml if "extension.yml" in namelist: with zf.open("extension.yml") as f: - manifest_data = yaml.safe_load(f) or {} + manifest_data = yaml_safe_load(f) or {} else: # Look for extension.yml in a single top-level subdirectory # (e.g., "repo-name-branch/extension.yml") manifest_paths = [n for n in namelist if n.endswith("/extension.yml") and n.count("/") == 1] if len(manifest_paths) == 1: with zf.open(manifest_paths[0]) as f: - manifest_data = yaml.safe_load(f) or {} + manifest_data = yaml_safe_load(f) or {} if manifest_data is None: raise ValueError("Downloaded extension archive is missing 'extension.yml'") diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 726b0fd2a6..8d12c88b06 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -14,6 +14,7 @@ import re from copy import deepcopy import yaml +from .yaml_utils import yaml_safe_load, yaml_dump def _build_agent_configs() -> dict[str, Any]: @@ -80,7 +81,7 @@ def parse_frontmatter(content: str) -> tuple[dict, str]: body = content[end_marker + 3 :].strip() try: - frontmatter = yaml.safe_load(frontmatter_str) or {} + frontmatter = yaml_safe_load(frontmatter_str) or {} except yaml.YAMLError: frontmatter = {} @@ -102,7 +103,7 @@ def render_frontmatter(fm: dict) -> str: if not fm: return "" - yaml_str = yaml.dump( + yaml_str = yaml_dump( fm, default_flow_style=False, sort_keys=False, allow_unicode=True ) return f"---\n{yaml_str}---\n" diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 26ceab4034..a5bf39e65c 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -23,6 +23,7 @@ import yaml from packaging import version as pkg_version +from .yaml_utils import yaml_safe_load, yaml_safe_dump, yaml_dump from packaging.specifiers import SpecifierSet, InvalidSpecifier _FALLBACK_CORE_COMMAND_NAMES = frozenset({ @@ -140,7 +141,7 @@ def _load_yaml(self, path: Path) -> dict: """Load YAML file safely.""" try: with open(path, 'r') as f: - data = yaml.safe_load(f) + data = yaml_safe_load(f) except yaml.YAMLError as e: raise ValidationError(f"Invalid YAML in {path}: {e}") except FileNotFoundError: @@ -856,7 +857,6 @@ def _register_extension_skills( from . import load_init_options from .agents import CommandRegistrar from .integrations import get_integration - import yaml written: List[str] = [] opts = load_init_options(self.project_root) @@ -931,7 +931,7 @@ def _register_extension_skills( description, f"extension:{manifest.id}", ) - frontmatter_text = yaml.safe_dump(frontmatter_data, sort_keys=False).strip() + frontmatter_text = yaml_safe_dump(frontmatter_data, sort_keys=False).strip() # Derive a human-friendly title from the command name short_name = cmd_name @@ -1004,13 +1004,13 @@ def _unregister_extension_skills(self, skill_names: List[str], extension_id: str if not skill_md.is_file(): continue try: - import yaml as _yaml + from .yaml_utils import yaml_safe_load as _yaml_safe_load raw = skill_md.read_text(encoding="utf-8") source = "" if raw.startswith("---"): parts = raw.split("---", 2) if len(parts) >= 3: - fm = _yaml.safe_load(parts[1]) or {} + fm = _yaml_safe_load(parts[1]) or {} source = ( fm.get("metadata", {}).get("source", "") if isinstance(fm, dict) @@ -1055,13 +1055,13 @@ def _unregister_extension_skills(self, skill_names: List[str], extension_id: str if not skill_md.is_file(): continue try: - import yaml as _yaml + from .yaml_utils import yaml_safe_load as _yaml_safe_load raw = skill_md.read_text(encoding="utf-8") source = "" if raw.startswith("---"): parts = raw.split("---", 2) if len(parts) >= 3: - fm = _yaml.safe_load(parts[1]) or {} + fm = _yaml_safe_load(parts[1]) or {} source = ( fm.get("metadata", {}).get("source", "") if isinstance(fm, dict) @@ -1557,7 +1557,7 @@ def _load_catalog_config(self, config_path: Path) -> Optional[List[CatalogEntry] if not config_path.exists(): return None try: - data = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + data = yaml_safe_load(config_path.read_text(encoding="utf-8")) or {} except (yaml.YAMLError, OSError, UnicodeError) as e: raise ValidationError( f"Failed to read catalog config {config_path}: {e}" @@ -2057,7 +2057,7 @@ def _load_yaml_config(self, file_path: Path) -> Dict[str, Any]: return {} try: - return yaml.safe_load(file_path.read_text(encoding="utf-8")) or {} + return yaml_safe_load(file_path.read_text(encoding="utf-8")) or {} except (yaml.YAMLError, OSError, UnicodeError): return {} @@ -2301,7 +2301,7 @@ def get_project_config(self) -> Dict[str, Any]: } try: - return yaml.safe_load(self.config_file.read_text(encoding="utf-8")) or {} + return yaml_safe_load(self.config_file.read_text(encoding="utf-8")) or {} except (yaml.YAMLError, OSError, UnicodeError): return { "installed": [], @@ -2317,7 +2317,7 @@ def save_project_config(self, config: Dict[str, Any]): """ self.config_file.parent.mkdir(parents=True, exist_ok=True) self.config_file.write_text( - yaml.dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True), + yaml_dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True), encoding="utf-8", ) diff --git a/src/specify_cli/integrations/base.py b/src/specify_cli/integrations/base.py index a3d8a42aa2..89347d3ed2 100644 --- a/src/specify_cli/integrations/base.py +++ b/src/specify_cli/integrations/base.py @@ -907,12 +907,13 @@ def _extract_description(content: str) -> str: raw text. """ import yaml + from ..yaml_utils import yaml_safe_load frontmatter_text, _ = TomlIntegration._split_frontmatter(content) if not frontmatter_text: return "" try: - frontmatter = yaml.safe_load(frontmatter_text) or {} + frontmatter = yaml_safe_load(frontmatter_text) or {} except yaml.YAMLError: return "" @@ -1094,6 +1095,7 @@ def command_filename(self, template_name: str) -> str: def _extract_frontmatter(content: str) -> dict[str, Any]: """Extract frontmatter as a dict from YAML frontmatter block.""" import yaml + from ..yaml_utils import yaml_safe_load if not content.startswith("---"): return {} @@ -1113,7 +1115,7 @@ def _extract_frontmatter(content: str) -> dict[str, Any]: frontmatter_text = "".join(lines[1:frontmatter_end]) try: - fm = yaml.safe_load(frontmatter_text) or {} + fm = yaml_safe_load(frontmatter_text) or {} except yaml.YAMLError: return {} @@ -1162,7 +1164,7 @@ def _render_yaml(title: str, description: str, body: str, source_id: str) -> str for the prompt content. Uses ``yaml.safe_dump()`` for the header fields to ensure proper escaping. """ - import yaml + from ..yaml_utils import yaml_safe_dump header = { "version": "1.0.0", @@ -1173,7 +1175,7 @@ def _render_yaml(title: str, description: str, body: str, source_id: str) -> str "activities": ["Spec-Driven Development"], } - header_yaml = yaml.safe_dump( + header_yaml = yaml_safe_dump( header, sort_keys=False, allow_unicode=True, @@ -1343,6 +1345,7 @@ def setup( ``name``, ``description``, ``compatibility``, and ``metadata``. """ import yaml + from ..yaml_utils import yaml_safe_load templates = self.list_command_templates() if not templates: @@ -1385,7 +1388,7 @@ def setup( parts = raw.split("---", 2) if len(parts) >= 3: try: - fm = yaml.safe_load(parts[1]) + fm = yaml_safe_load(parts[1]) if isinstance(fm, dict): frontmatter = fm except yaml.YAMLError: diff --git a/src/specify_cli/integrations/catalog.py b/src/specify_cli/integrations/catalog.py index 2faa69ae96..15bb7384c2 100644 --- a/src/specify_cli/integrations/catalog.py +++ b/src/specify_cli/integrations/catalog.py @@ -21,6 +21,8 @@ import yaml from packaging import version as pkg_version +from ..yaml_utils import yaml_safe_load + # --------------------------------------------------------------------------- # Errors @@ -101,7 +103,7 @@ def _load_catalog_config( if not config_path.exists(): return None try: - data = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + data = yaml_safe_load(config_path.read_text(encoding="utf-8")) or {} except (yaml.YAMLError, OSError, UnicodeError) as exc: raise IntegrationCatalogError( f"Failed to read catalog config {config_path}: {exc}" @@ -447,7 +449,7 @@ def __init__(self, descriptor_path: Path) -> None: def _load(path: Path) -> dict: try: with open(path, "r", encoding="utf-8") as fh: - return yaml.safe_load(fh) or {} + return yaml_safe_load(fh) or {} except yaml.YAMLError as exc: raise IntegrationDescriptorError(f"Invalid YAML in {path}: {exc}") except FileNotFoundError: diff --git a/src/specify_cli/integrations/claude/__init__.py b/src/specify_cli/integrations/claude/__init__.py index 3e39db717e..a5f16909cc 100644 --- a/src/specify_cli/integrations/claude/__init__.py +++ b/src/specify_cli/integrations/claude/__init__.py @@ -7,9 +7,8 @@ import re -import yaml - from ..base import SkillsIntegration +from ...yaml_utils import yaml_safe_dump from ..manifest import IntegrationManifest # Note injected into hook sections so Claude maps dot-notation command @@ -112,7 +111,7 @@ def _render_skill(self, template_name: str, frontmatter: dict[str, Any], body: s skill_frontmatter = self._build_skill_fm( skill_name, description, f"templates/commands/{template_name}.md" ) - frontmatter_text = yaml.safe_dump(skill_frontmatter, sort_keys=False).strip() + frontmatter_text = yaml_safe_dump(skill_frontmatter, sort_keys=False).strip() return f"---\n{frontmatter_text}\n---\n\n{body.strip()}\n" def _build_skill_fm(self, name: str, description: str, source: str) -> dict: diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index ed33f992c3..06a5e1e7d5 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -25,6 +25,8 @@ import yaml from packaging import version as pkg_version + +from specify_cli.yaml_utils import yaml_safe_dump, yaml_safe_load from packaging.specifiers import SpecifierSet, InvalidSpecifier from .extensions import ExtensionRegistry, normalize_priority @@ -137,7 +139,7 @@ def _load_yaml(self, path: Path) -> dict: """Load YAML file safely.""" try: with open(path, 'r') as f: - return yaml.safe_load(f) or {} + return yaml_safe_load(f) or {} except yaml.YAMLError as e: raise PresetValidationError(f"Invalid YAML in {path}: {e}") except FileNotFoundError: @@ -1050,7 +1052,7 @@ def _reconcile_skills(self, command_names: List[str]) -> None: skill_name, desc, f"override:{cmd_name}", ) - fm_text = yaml.safe_dump(fm_data, sort_keys=False).strip() + fm_text = yaml_safe_dump(fm_data, sort_keys=False).strip() skill_title = self._skill_title_from_command(cmd_name) skill_content = ( f"---\n{fm_text}\n---\n\n" @@ -1322,7 +1324,7 @@ def _register_skills( enhanced_desc, f"preset:{manifest.id}", ) - frontmatter_text = yaml.safe_dump(frontmatter_data, sort_keys=False).strip() + frontmatter_text = yaml_safe_dump(frontmatter_data, sort_keys=False).strip() skill_content = ( f"---\n" f"{frontmatter_text}\n" @@ -1415,7 +1417,7 @@ def _unregister_skills(self, skill_names: List[str], preset_dir: Path) -> None: enhanced_desc, f"templates/commands/{short_name}.md", ) - frontmatter_text = yaml.safe_dump(frontmatter_data, sort_keys=False).strip() + frontmatter_text = yaml_safe_dump(frontmatter_data, sort_keys=False).strip() skill_title = self._skill_title_from_command(short_name) skill_content = ( f"---\n" @@ -1449,7 +1451,7 @@ def _unregister_skills(self, skill_names: List[str], preset_dir: Path) -> None: frontmatter.get("description", f"Extension command: {command_name}"), extension_restore["source"], ) - frontmatter_text = yaml.safe_dump(frontmatter_data, sort_keys=False).strip() + frontmatter_text = yaml_safe_dump(frontmatter_data, sort_keys=False).strip() skill_content = ( f"---\n" f"{frontmatter_text}\n" @@ -1848,7 +1850,7 @@ def _load_catalog_config(self, config_path: Path) -> Optional[List[PresetCatalog if not config_path.exists(): return None try: - data = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + data = yaml_safe_load(config_path.read_text(encoding="utf-8")) or {} except (yaml.YAMLError, OSError, UnicodeError) as e: raise PresetValidationError( f"Failed to read catalog config {config_path}: {e}" @@ -2753,7 +2755,7 @@ def _find_in_subdirs(base_dir: Path) -> Optional[Path]: break if fence_end > 0: fm_text = "".join(lines[1:fence_end]) - fm_data = yaml.safe_load(fm_text) + fm_data = yaml_safe_load(fm_text) if isinstance(fm_data, dict): fm_strategy = fm_data.get("strategy") if isinstance(fm_strategy, str) and fm_strategy.lower() in VALID_PRESET_STRATEGIES: @@ -3042,7 +3044,7 @@ def _parse_fm_yaml(fm_block: str) -> dict: else: yaml_lines = [] try: - return yaml.safe_load("\n".join(yaml_lines)) or {} + return yaml_safe_load("\n".join(yaml_lines)) or {} except yaml.YAMLError: return {} @@ -3062,7 +3064,7 @@ def _parse_fm_yaml(fm_block: str) -> dict: if top_fm: top_frontmatter_text = ( "---\n" - + yaml.safe_dump(top_fm, sort_keys=False).strip() + + yaml_safe_dump(top_fm, sort_keys=False).strip() + "\n---" ) else: diff --git a/src/specify_cli/workflows/catalog.py b/src/specify_cli/workflows/catalog.py index da5c60b5c8..82e13a87ca 100644 --- a/src/specify_cli/workflows/catalog.py +++ b/src/specify_cli/workflows/catalog.py @@ -18,6 +18,7 @@ from typing import Any import yaml +from ..yaml_utils import yaml_safe_load, yaml_dump # --------------------------------------------------------------------------- @@ -177,7 +178,7 @@ def _load_catalog_config( if not config_path.exists(): return None try: - data = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + data = yaml_safe_load(config_path.read_text(encoding="utf-8")) or {} except (yaml.YAMLError, OSError, UnicodeError) as exc: raise WorkflowValidationError( f"Failed to read catalog config {config_path}: {exc}" @@ -468,7 +469,7 @@ def add_catalog(self, url: str, name: str | None = None) -> None: data: dict[str, Any] = {"catalogs": []} if config_path.exists(): - raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) + raw = yaml_safe_load(config_path.read_text(encoding="utf-8")) if not isinstance(raw, dict): raise WorkflowValidationError( "Catalog config file is corrupted (expected a mapping)." @@ -505,7 +506,7 @@ def add_catalog(self, url: str, name: str | None = None) -> None: config_path.parent.mkdir(parents=True, exist_ok=True) with open(config_path, "w", encoding="utf-8") as f: - yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True) + yaml_dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True) def remove_catalog(self, index: int) -> str: """Remove a catalog source by index (0-based). Returns the removed name.""" @@ -513,7 +514,7 @@ def remove_catalog(self, index: int) -> str: if not config_path.exists(): raise WorkflowValidationError("No catalog config file found.") - data = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + data = yaml_safe_load(config_path.read_text(encoding="utf-8")) or {} if not isinstance(data, dict): raise WorkflowValidationError( "Catalog config file is corrupted (expected a mapping)." @@ -533,7 +534,7 @@ def remove_catalog(self, index: int) -> str: data["catalogs"] = catalogs with open(config_path, "w", encoding="utf-8") as f: - yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True) + yaml_dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True) if isinstance(removed, dict): return removed.get("name", f"catalog-{index + 1}") diff --git a/src/specify_cli/workflows/engine.py b/src/specify_cli/workflows/engine.py index d6a73bbeb0..faba10334b 100644 --- a/src/specify_cli/workflows/engine.py +++ b/src/specify_cli/workflows/engine.py @@ -17,7 +17,7 @@ from pathlib import Path from typing import Any -import yaml +from ..yaml_utils import yaml_safe_load, yaml_safe_dump from .base import RunStatus, StepContext, StepResult, StepStatus @@ -61,7 +61,7 @@ def __init__(self, data: dict[str, Any], source_path: Path | None = None) -> Non def from_yaml(cls, path: Path) -> WorkflowDefinition: """Load a workflow definition from a YAML file.""" with open(path, encoding="utf-8") as f: - data = yaml.safe_load(f) + data = yaml_safe_load(f) if not isinstance(data, dict): msg = f"Workflow YAML must be a mapping, got {type(data).__name__}." raise ValueError(msg) @@ -70,7 +70,7 @@ def from_yaml(cls, path: Path) -> WorkflowDefinition: @classmethod def from_string(cls, content: str) -> WorkflowDefinition: """Load a workflow definition from a YAML string.""" - data = yaml.safe_load(content) + data = yaml_safe_load(content) if not isinstance(data, dict): msg = f"Workflow YAML must be a mapping, got {type(data).__name__}." raise ValueError(msg) @@ -412,9 +412,8 @@ def execute( run_dir = self.project_root / ".specify" / "workflows" / "runs" / state.run_id run_dir.mkdir(parents=True, exist_ok=True) workflow_copy = run_dir / "workflow.yml" - import yaml with open(workflow_copy, "w", encoding="utf-8") as f: - yaml.safe_dump(definition.data, f, sort_keys=False) + yaml_safe_dump(definition.data, f, sort_keys=False) # Resolve inputs resolved_inputs = self._resolve_inputs(definition, inputs or {}) diff --git a/src/specify_cli/yaml_utils.py b/src/specify_cli/yaml_utils.py new file mode 100644 index 0000000000..a4397c5239 --- /dev/null +++ b/src/specify_cli/yaml_utils.py @@ -0,0 +1,25 @@ +"""YAML utilities leveraging C-based extensions for better performance.""" + +import yaml +from typing import Any, IO + +try: + from yaml import CSafeLoader as SafeLoader + from yaml import CSafeDumper as SafeDumper + from yaml import CDumper as Dumper +except ImportError: + from yaml import SafeLoader + from yaml import SafeDumper + from yaml import Dumper + +def yaml_safe_load(stream: str | bytes | IO[str] | IO[bytes]) -> Any: + """Safely load YAML data using the fastest available loader.""" + return yaml.load(stream, Loader=SafeLoader) + +def yaml_safe_dump(data: Any, stream: IO[Any] | None = None, **kwargs: Any) -> str | Any: + """Safely dump YAML data using the fastest available dumper.""" + return yaml.dump(data, stream, Dumper=SafeDumper, **kwargs) + +def yaml_dump(data: Any, stream: IO[Any] | None = None, **kwargs: Any) -> str | Any: + """Dump YAML data using the fastest available dumper.""" + return yaml.dump(data, stream, Dumper=Dumper, **kwargs)