diff --git a/build_by_month.py b/build_by_month.py index c694f97..bf2fab3 100644 --- a/build_by_month.py +++ b/build_by_month.py @@ -31,44 +31,19 @@ def _get_first_n_words(text: str, n: int = 15) -> str: return " ".join(words[:n]) + "..." -def _extract_summary(docs_path: Path, word_limit: int = 30) -> str: - """Extract the first paragraph of the docs file, limited to word_limit words.""" - if not docs_path.exists(): +def _extract_summary(meta_path: Path, word_limit: int = 30) -> str: + """Extract the description from the meta JSON file, limited to word_limit words.""" + if not meta_path.exists(): return "" try: - content = docs_path.read_text("utf-8").strip() - except OSError: + import json + data = json.load(meta_path.open("r", encoding="utf-8")) + description = data.get("description", "") + return _get_first_n_words(description, word_limit) + except (OSError, json.JSONDecodeError): return "" - # Remove HTML comments - if "", content) + if match: + return match.group(1) + return "" + + +def main(): + # Create meta directory if it doesn't exist + meta_dir = Path("meta") + meta_dir.mkdir(exist_ok=True) + + # Find all docs.md files in the current directory + docs_files = sorted(Path(".").glob("*.docs.md")) + + converted_count = 0 + + for docs_file in docs_files: + # Read the content + content = docs_file.read_text("utf-8") + + # Extract description and commit + description = extract_description(content) + commit = extract_commit(content) + + # Determine output filename (e.g., ai-adoption.docs.md -> meta/ai-adoption.json) + slug = docs_file.stem.replace(".docs", "") + output_file = meta_dir / f"{slug}.json" + + # Create JSON object + data = { + "description": description, + "commit": commit, + } + + # Write to file with pretty printing + output_file.write_text(json.dumps(data, indent=2) + "\n", "utf-8") + converted_count += 1 + print(f"Converted {docs_file} -> {output_file}") + + print(f"\nConverted {converted_count} files to JSON in meta/") + + +if __name__ == "__main__": + main() diff --git a/gather_links.py b/gather_links.py index f2a925c..a30985b 100755 --- a/gather_links.py +++ b/gather_links.py @@ -67,30 +67,17 @@ def extract_urls(text): return re.findall(url_pattern, text) -def extract_description(docs_path: Path) -> str: - """Extract the first paragraph of the generated docs markdown file.""" - if not docs_path.exists(): +def extract_description(meta_path: Path) -> str: + """Extract the description from the meta JSON file.""" + if not meta_path.exists(): return "" try: - content = docs_path.read_text("utf-8").strip() - except OSError: + data = json.load(meta_path.open("r", encoding="utf-8")) + return data.get("description", "") + except (OSError, json.JSONDecodeError): return "" - if "