PlanExeOrg · neoneye · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/worker_plan/worker_plan_internal/extract_dag.py b/worker_plan/worker_plan_internal/extract_dag.py
@@ -0,0 +1,244 @@
+"""Extract the pipeline DAG from Luigi task introspection.
+
+Walks the FullPlanPipeline task graph via requires()/output() and produces
+a JSON description of every stage: name, output files, upstream stages,
+and source code files.  This replaces the hand-maintained registry with a
+generated artifact that stays in sync with the actual pipeline code.
+
+Usage:
+    cd worker_plan
+    python -m worker_plan_internal.extract_dag
+    python -m worker_plan_internal.extract_dag --output pipeline_dag.json
+"""
+import inspect
+import json
+import re
+import sys
+from pathlib import Path
+from typing import Any
+
+import luigi
+
+_WORKER_PLAN_DIR = Path(__file__).resolve().parent.parent  # worker_plan/
+
+# Module prefixes that are infrastructure/utilities, not implementation logic.
+# Imports from these are excluded from source_files auto-detection.
+_INFRASTRUCTURE_PREFIXES = (
+    "worker_plan_internal.plan.stages.",
+    "worker_plan_internal.plan.run_plan_pipeline",
+    "worker_plan_internal.plan.pipeline_environment",
+    "worker_plan_internal.plan.ping_llm",
+    "worker_plan_internal.llm_util.",
+    "worker_plan_internal.llm_factory",
+    "worker_plan_internal.luigi_util.",
+    "worker_plan_internal.utils.",
+    "worker_plan_internal.format_",
+    "worker_plan_api.",
+)
+
+
+def _class_name_to_stage_name(class_name: str) -> str:
+    """Convert CamelCase task class name to snake_case stage name.
+
+    Removes the 'Task' suffix, then converts CamelCase → snake_case.
+
+    Examples:
+        PotentialLeversTask       → potential_levers
+        SWOTAnalysisTask          → swot_analysis
+        WBSProjectLevel1AndLevel2Task → wbs_project_level1_and_level2
+        GovernancePhase1AuditTask → governance_phase1_audit
+    """
+    name = class_name.removesuffix("Task")
+    # Insert underscore between lowercase/digit and uppercase
+    name = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", name)
+    # Insert underscore between consecutive uppercase run and uppercase+lowercase
+    name = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", name)
+    return name.lower()
+
+
+def _extract_output_filenames(task: luigi.Task) -> list[str]:
+    """Extract output filenames (basenames) from a task's output() method."""
+    try:
+        outputs = task.output()
+    except Exception:
+        return []
+
+    targets: list[Any] = []
+    if isinstance(outputs, dict):
+        targets = list(outputs.values())
+    elif isinstance(outputs, (list, tuple)):
+        targets = list(outputs)
+    else:
+        targets = [outputs]
+
+    filenames: list[str] = []
+    for target in targets:
+        if hasattr(target, "path"):
+            filenames.append(Path(target.path).name)
+    return filenames
+
+
+def _extract_upstream_tasks(task: luigi.Task) -> list[luigi.Task]:
+    """Extract upstream task instances from a task's requires() method."""
+    try:
+        deps = task.requires()
+    except Exception:
+        return []
+
+    if deps is None:
+        return []
+    if isinstance(deps, dict):
+        return list(deps.values())
+    if isinstance(deps, (list, tuple)):
+        return list(deps)
+    if isinstance(deps, luigi.Task):
+        return [deps]
+    return []
+
+
+def _detect_implementation_files(cls: type) -> list[str]:
+    """Auto-detect implementation source files from module-level imports.
+
+    Scans the module that defines *cls* for classes and functions imported
+    from ``worker_plan_internal.*`` that are NOT infrastructure (stages,
+    LLM utilities, API types, etc.).  Returns paths relative to worker_plan/.
+    """
+    module = inspect.getmodule(cls)
+    if module is None:
+        return []
+
+    files: list[str] = []
+    seen_modules: set[str] = set()
+
+    for attr_name in dir(module):
+        obj = getattr(module, attr_name, None)
+        if obj is None or not (inspect.isclass(obj) or inspect.isfunction(obj)):
+            continue
+
+        obj_module_name = getattr(obj, "__module__", "") or ""
+        if not obj_module_name.startswith("worker_plan_internal."):
+            continue
+        if any(obj_module_name.startswith(p) for p in _INFRASTRUCTURE_PREFIXES):
+            continue
+        if obj_module_name in seen_modules:
+            continue
+        seen_modules.add(obj_module_name)
+
+        try:
+            obj_file = Path(inspect.getfile(obj)).resolve()
+            rel = str(obj_file.relative_to(_WORKER_PLAN_DIR))
+            if rel not in files:
+                files.append(rel)
+        except (TypeError, ValueError, OSError):
+            continue
+
+    return files
+
+
+def _extract_source_files(task: luigi.Task) -> list[str]:
+    """Get source files: task's own file + auto-detected implementation files."""
+    cls = type(task)
+
+    # The task's own file
+    result: list[str] = []
+    try:
+        task_file = Path(inspect.getfile(cls)).resolve()
+        result.append(str(task_file.relative_to(_WORKER_PLAN_DIR)))
+    except (TypeError, ValueError, OSError):
+        pass
+
+    # Supplement with auto-detected implementation files
+    for f in _detect_implementation_files(cls):
+        if f not in result:
+            result.append(f)
+
+    return result
+
+
+def _output_sort_key(stage: dict[str, Any]) -> tuple[int, int, str]:
+    """Sort key: numeric prefix from the first output filename, then name."""
+    filename = stage["output_files"][0] if stage.get("output_files") else ""
+    match = re.match(r"(\d+)-?(\d+)?", filename)
+    if match:
+        major = int(match.group(1))
+        minor = int(match.group(2)) if match.group(2) else 0
+        return (major, minor, stage["id"])
+    return (9999, 0, stage["id"])
+
+
+def extract_dag() -> dict[str, Any]:
+    """Walk the FullPlanPipeline task graph and extract DAG info.
+
+    Returns a top-level schema object with stages sorted by pipeline order.
+    """
+    from worker_plan_internal.plan.stages.full_plan_pipeline import FullPlanPipeline
+
+    root = FullPlanPipeline(run_id_dir=Path("/tmp/_dag_extract_dummy"))
+
+    stages: list[dict[str, Any]] = []
+    visited: set[str] = set()
+
+    def _walk(task: luigi.Task) -> None:
+        class_name = task.__class__.__name__
+        if class_name in visited:
+            return
+        visited.add(class_name)
+
+        upstream_tasks = _extract_upstream_tasks(task)
+
+        # Recurse into dependencies first (depth-first)
+        for dep in upstream_tasks:
+            _walk(dep)
+
+        # Skip the orchestrator itself
+        if class_name == "FullPlanPipeline":
+            return
+
+        cls = type(task)
+        stage_name = _class_name_to_stage_name(class_name)
+        description = cls.description() if hasattr(cls, "description") else ""
+        output_files = _extract_output_filenames(task)
+        source_files = _extract_source_files(task)
+        depends_on_names = sorted(set(
+            _class_name_to_stage_name(dep.__class__.__name__)
+            for dep in upstream_tasks
+        ))
+
+        stages.append({
+            "id": stage_name,
+            "description": description,
+            "output_files": output_files,
+            "depends_on": depends_on_names,
+            "source_files": source_files,
+        })
+
+    _walk(root)
+
+    stages.sort(key=_output_sort_key)
+
+    return {
+        "schema_version": "1.0",
+        "pipeline_name": "planning_pipeline",
+        "description": "DAG for PlanExe, an AI-driven project planning system.",
+        "stages": stages,
+    }
+
+
+def main() -> None:
+    output_path = None
+    args = sys.argv[1:]
+    if len(args) >= 2 and args[0] == "--output":
+        output_path = args[1]
+
+    dag = extract_dag()
+    dag_json = json.dumps(dag, indent=2, ensure_ascii=False)
+
+    if output_path:
+        Path(output_path).write_text(dag_json + "\n", encoding="utf-8")
+        print(f"Wrote {len(dag['stages'])} stages to {output_path}", file=sys.stderr)
+    else:
+        print(dag_json)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py b/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py
@@ -78,6 +78,18 @@ class PlanTask(luigi.Task):
     # If the callback is not provided, the pipeline will run until completion.
     _pipeline_executor_callback = luigi.Parameter(default=None, significant=False, visibility=luigi.parameter.ParameterVisibility.PRIVATE)
 
+    @classmethod
+    def description(cls) -> str:
+        """Brief description of what this task does.
+
+        Default returns the first line of the class docstring.
+        Override in subclasses for a custom description.
+        """
+        doc = cls.__doc__
+        if doc:
+            return doc.strip().split("\n")[0].strip()
+        return ""
+
     def file_path(self, filename: FilenameEnum) -> Path:
         return self.run_id_dir / filename.value
 

diff --git a/worker_plan/worker_plan_internal/plan/stages/candidate_scenarios.py b/worker_plan/worker_plan_internal/plan/stages/candidate_scenarios.py
@@ -11,9 +11,7 @@
 
 
 class CandidateScenariosTask(PlanTask):
-    """
-    Combinations of the vital few levers.
-    """
+    """Generate aggressive, moderate, and conservative scenarios from the vital few levers."""
     def requires(self):
         return {
             'setup': self.clone(SetupTask),

diff --git a/worker_plan/worker_plan_internal/plan/stages/consolidate_assumptions_markdown.py b/worker_plan/worker_plan_internal/plan/stages/consolidate_assumptions_markdown.py
@@ -18,9 +18,7 @@
 
 
 class ConsolidateAssumptionsMarkdownTask(PlanTask):
-    """
-    Combines multiple small markdown documents into a single big document.
-    """
+    """Merge locations, currency, risks, and assumption stages into one reference document."""
     def requires(self):
         return {
             'identify_purpose': self.clone(IdentifyPurposeTask),

diff --git a/worker_plan/worker_plan_internal/plan/stages/consolidate_governance.py b/worker_plan/worker_plan_internal/plan/stages/consolidate_governance.py
@@ -10,6 +10,8 @@
 
 
 class ConsolidateGovernanceTask(PlanTask):
+    """Consolidate all governance phases into a single markdown document."""
+
     def requires(self):
         return {
             'governance_phase1_audit': self.clone(GovernancePhase1AuditTask),

diff --git a/worker_plan/worker_plan_internal/plan/stages/constraint_checker_stages.py b/worker_plan/worker_plan_internal/plan/stages/constraint_checker_stages.py
@@ -30,7 +30,7 @@ def _read_constraints_json(task: PlanTask) -> str:
 
 
 class PotentialLeversConstraintTask(PlanTask):
-    """Check potential levers output for constraint violations."""
+    """Guardrail: verify brainstormed levers respect the user's constraints."""
     def requires(self):
         return {
             'extract_constraints': self.clone(ExtractConstraintsTask),
@@ -49,7 +49,7 @@ def run_with_llm(self, llm: LLM) -> None:
 
 
 class DeduplicatedLeversConstraintTask(PlanTask):
-    """Check deduplicated levers output for constraint violations."""
+    """Guardrail: verify triaged levers still respect the user's constraints."""
     def requires(self):
         return {
             'extract_constraints': self.clone(ExtractConstraintsTask),
@@ -68,7 +68,7 @@ def run_with_llm(self, llm: LLM) -> None:
 
 
 class EnrichedLeversConstraintTask(PlanTask):
-    """Check enriched levers output for constraint violations."""
+    """Guardrail: verify enriched levers still respect the user's constraints."""
     def requires(self):
         return {
             'extract_constraints': self.clone(ExtractConstraintsTask),
@@ -87,7 +87,7 @@ def run_with_llm(self, llm: LLM) -> None:
 
 
 class VitalFewLeversConstraintTask(PlanTask):
-    """Check vital few levers output for constraint violations."""
+    """Guardrail: verify the selected vital levers respect the user's constraints."""
     def requires(self):
         return {
             'extract_constraints': self.clone(ExtractConstraintsTask),
@@ -106,7 +106,7 @@ def run_with_llm(self, llm: LLM) -> None:
 
 
 class CandidateScenariosConstraintTask(PlanTask):
-    """Check candidate scenarios output for constraint violations."""
+    """Guardrail: verify generated scenarios respect the user's constraints."""
     def requires(self):
         return {
             'extract_constraints': self.clone(ExtractConstraintsTask),
@@ -125,7 +125,7 @@ def run_with_llm(self, llm: LLM) -> None:
 
 
 class SelectedScenarioConstraintTask(PlanTask):
-    """Check selected scenario output for constraint violations."""
+    """Guardrail: verify the chosen scenario respects the user's constraints before planning begins."""
     def requires(self):
         return {
             'extract_constraints': self.clone(ExtractConstraintsTask),

diff --git a/worker_plan/worker_plan_internal/plan/stages/convert_pitch_to_markdown.py b/worker_plan/worker_plan_internal/plan/stages/convert_pitch_to_markdown.py
@@ -9,12 +9,7 @@
 
 
 class ConvertPitchToMarkdownTask(PlanTask):
-    """
-    Human readable version of the pitch.
-
-    This task depends on:
-      - CreatePitchTask: Creates the pitch JSON.
-    """
+    """Convert the raw pitch JSON into a polished, scannable markdown document."""
     def output(self):
         return {
             'raw': self.local_target(FilenameEnum.PITCH_CONVERT_TO_MARKDOWN_RAW),

diff --git a/worker_plan/worker_plan_internal/plan/stages/create_pitch.py b/worker_plan/worker_plan_internal/plan/stages/create_pitch.py
@@ -17,15 +17,7 @@
 
 
 class CreatePitchTask(PlanTask):
-    """
-    Create a the pitch that explains the project plan, from multiple perspectives.
-
-    This task depends on:
-      - ProjectPlanTask: provides the project plan JSON.
-      - WBSProjectLevel1AndLevel2Task: containing the top level of the project plan.
-
-    The resulting pitch JSON is written to the file specified by FilenameEnum.PITCH.
-    """
+    """Create a compelling project pitch with target audience, call to action, and risk mitigation."""
     def output(self):
         return self.local_target(FilenameEnum.PITCH_RAW)
 

diff --git a/worker_plan/worker_plan_internal/plan/stages/create_schedule.py b/worker_plan/worker_plan_internal/plan/stages/create_schedule.py
@@ -19,6 +19,8 @@
 
 
 class CreateScheduleTask(PlanTask):
+    """Build the project schedule and generate Gantt charts."""
+
     def output(self):
         return {
             'dhtmlx_html': self.local_target(FilenameEnum.SCHEDULE_GANTT_DHTMLX_HTML),