diff --git a/.github/workflows/netlify-deploy-v2.yaml b/.github/workflows/netlify-deploy-v2.yaml index 241c352d5..50b96ac59 100644 --- a/.github/workflows/netlify-deploy-v2.yaml +++ b/.github/workflows/netlify-deploy-v2.yaml @@ -44,7 +44,7 @@ jobs: uses: actions/cache@v4 with: path: docs/versioned_docs/${{ matrix.version.section }} - key: version-docs-${{ matrix.version.section }}-${{ steps.sha.outputs.sha }} + key: version-docs-${{ hashFiles('scripts/docs/*.py', 'scripts/docs/templates/**', 'docs/*_template.md') }}-${{ matrix.version.section }}-${{ steps.sha.outputs.sha }} - name: Checkout if: steps.cache.outputs.cache-hit != 'true' uses: actions/checkout@v4 @@ -54,7 +54,7 @@ jobs: - name: Checkout branch packages if: steps.cache.outputs.cache-hit != 'true' run: | - git checkout origin/${{ matrix.version.branch }} -- gooddata-api-client/ packages/gooddata-sdk/ packages/gooddata-pandas/ scripts/script-requirements.txt + git checkout origin/${{ matrix.version.branch }} -- gooddata-api-client/ packages/gooddata-sdk/ packages/gooddata-pandas/ - name: Setup Python if: steps.cache.outputs.cache-hit != 'true' uses: actions/setup-python@v5 diff --git a/.github/workflows/rw-collect-changes.yaml b/.github/workflows/rw-collect-changes.yaml index d79827525..a4ec2d5c8 100644 --- a/.github/workflows/rw-collect-changes.yaml +++ b/.github/workflows/rw-collect-changes.yaml @@ -40,6 +40,7 @@ jobs: python-modules: - '.docker/**' - 'packages/**' + - 'scripts/docs/**' - '*.mk' - 'Makefile' - Dockerfile diff --git a/.github/workflows/rw-python-tests.yaml b/.github/workflows/rw-python-tests.yaml index 034648984..89d663288 100644 --- a/.github/workflows/rw-python-tests.yaml +++ b/.github/workflows/rw-python-tests.yaml @@ -44,6 +44,22 @@ jobs: - name: pep8 and formatting check run: | make format + docs-scripts-tests: + runs-on: ubuntu-latest + if: ${{inputs.changed-python-modules == 'true'}} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up python 3.14 + uses: astral-sh/setup-uv@v6 + with: + python-version: 3.14 + - name: Install dependencies + run: | + uv sync --group test --locked + - name: Test docs scripts + run: | + make test-docs-scripts types-check: runs-on: ubuntu-latest if: ${{inputs.changed-python-modules == 'true'}} diff --git a/Makefile b/Makefile index 809f20130..bbb2f2c6c 100644 --- a/Makefile +++ b/Makefile @@ -108,6 +108,10 @@ remove-cassettes: for project in $(NO_CLIENT_GD_PROJECTS_DIRS); do $(MAKE) -C packages/$${project} $@ || RESULT=$$?; done; \ exit $$RESULT +.PHONY: test-docs-scripts +test-docs-scripts: + uv run pytest scripts/docs/tests/ -v + .PHONY: new-docs new-docs: cd docs; \ diff --git a/docs/Dockerfile b/docs/Dockerfile index 1d2fc2da5..8a34e4d66 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -7,8 +7,8 @@ RUN apt-get update && apt-get install -y git make curl # Copy only dependency manifests and package source before installing. COPY scripts/script-requirements.txt /scripts/script-requirements.txt COPY gooddata-api-client /gooddata-api-client -COPY packages/gooddata-sdk /gooddata-sdk -COPY packages/gooddata-pandas /gooddata-pandas +COPY packages/gooddata-sdk /packages/gooddata-sdk +COPY packages/gooddata-pandas /packages/gooddata-pandas RUN --mount=type=cache,target=/root/.cache/pip \ pip install -r /scripts/script-requirements.txt @@ -21,9 +21,7 @@ WORKDIR /docs RUN python json_builder.py && \ python python_ref_builder.py api_spec.toml data.json latest content/en && \ - mkdir versioned_docs/latest && \ - mv -f data.json ./versioned_docs/latest/data.json && \ - mv -f content/en/latest/links.json ./versioned_docs/latest/links.json + rm -f data.json FROM node:20.18.0-bookworm-slim @@ -37,9 +35,9 @@ COPY --from=builder /docs /docs WORKDIR /docs -# Use BuildKit cache mounts so npm/Go package downloads survive layer rebuilds +# npm cache mount speeds up rebuilds; Go modules are stored in the image +# layer so Hugo can resolve them at runtime. RUN --mount=type=cache,target=/root/.npm \ - --mount=type=cache,target=/root/go/pkg/mod \ npm install && \ hugo mod get diff --git a/docs/class_template.md b/docs/class_template.md index 1ac9d54d8..cab546bd2 100644 --- a/docs/class_template.md +++ b/docs/class_template.md @@ -3,4 +3,4 @@ linkTitle: "LINK" no_list: true --- -{{< api-ref-class "PATH" >}} +CONTENT diff --git a/docs/content/en/latest/api-reference/_index.md b/docs/content/en/latest/api-reference/_index.md index df77a072a..810154d2b 100644 --- a/docs/content/en/latest/api-reference/_index.md +++ b/docs/content/en/latest/api-reference/_index.md @@ -4,6 +4,3 @@ linkTitle: "API Reference" weight: 99 navigationLabel: true --- - - -Placeholder for gerenerated API reference. diff --git a/docs/content/en/latest/pandas/_index.md b/docs/content/en/latest/pandas/_index.md index b3b3c40ab..91573280a 100644 --- a/docs/content/en/latest/pandas/_index.md +++ b/docs/content/en/latest/pandas/_index.md @@ -4,6 +4,3 @@ linkTitle: "GOODDATA PANDAS" weight: 59 navigationLabel: true --- - - -Placeholder for gerenerated API reference - pandas. diff --git a/docs/function_template.md b/docs/function_template.md index 9b35c58c8..65119e742 100644 --- a/docs/function_template.md +++ b/docs/function_template.md @@ -5,4 +5,4 @@ superheading: "PARENT." weight: 100 --- -{{< api-ref "PATH" >}} +CONTENT diff --git a/docs/module_template.md b/docs/module_template.md index 1b8e39c48..48a492e0b 100644 --- a/docs/module_template.md +++ b/docs/module_template.md @@ -4,4 +4,4 @@ linkTitle: "LINK" no_list: true --- -{{< api-ref-module "PATH" >}} +CONTENT diff --git a/pyproject.toml b/pyproject.toml index 660a2c01f..d8704ddaa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,8 @@ type = [ test = [ # Common test dependencies used across all workspace packages "pytest~=8.3.4", + # Required by scripts/docs/ tests (python_ref_builder uses toml) + "toml~=0.10.2", "pytest-cov~=6.0.0", "pytest-json-report==1.5.0", # Additional test dependencies used by multiple packages diff --git a/scripts/docs/README.md b/scripts/docs/README.md new file mode 100644 index 000000000..3e389b9e9 --- /dev/null +++ b/scripts/docs/README.md @@ -0,0 +1,87 @@ +# Documentation Generation + +This directory contains Python scripts that generate the API reference pages for +the GoodData Python SDK documentation site. + +## How it works + +The docs site is built with Hugo. API reference pages are generated in two steps: + +1. **`json_builder.py`** — introspects the installed `gooddata_sdk` and + `gooddata_pandas` packages using `inspect`, producing a `data.json` file with + class/function/property metadata and parsed docstrings. +2. **`python_ref_builder.py`** — reads `data.json` and `api_spec.toml`, then + generates markdown files with pre-rendered HTML for each module, class, and + function. Jinja2 templates in `templates/` replicate the output of the former + Hugo shortcodes, but at build time instead of at Hugo render time. + +The generated markdown files contain Hugo front matter (title, linkTitle, weight) +and a `CONTENT` block with the full HTML. Hugo serves them as-is without needing +shortcodes, `data.json`, or `links.json`. + +## Workflows + +There are three documentation deployment workflows, in order of +preference: + +### 1. V2 parallel workflow (`netlify-deploy-v2.yaml`) — recommended + +Triggered manually via `workflow_dispatch`. + +``` +discover-versions ──> generate-version (matrix, parallel) ──> build-and-deploy +``` + +- `discover-versions.sh` finds the latest N release branches. +- Each version runs in its own runner: checks out the version's SDK packages, + installs master's `script-requirements.txt`, runs `json_builder.py` + + `python_ref_builder.py` via `generate-single-version.sh`. +- Per-version results are cached by `(scripts hash + templates hash + branch SHA)`. +- `assemble-versions.sh` merges all version artifacts, promotes the highest + numbered version to `/latest`, then Hugo builds the final site. +- Deploys as a Netlify **draft** (no `--prod`). + +### 2. Legacy single-job workflow (`netlify-deploy.yaml`) — production + +Triggered manually via `workflow_dispatch`. + +- Uses `hugo-build-versioned-action` which downloads `generate.sh` from master. +- `generate.sh` runs all versions sequentially in one job. +- Deploys to Netlify with `--prod`. + +### 3. PR preview (`netlify-deploy-preview.yaml`) + +Triggered automatically on PRs that change `docs/**`. + +- Only builds the current branch's docs content (no multi-version). +- Uses the simpler `hugo-build-action` (no API reference generation). +- Deploys to a Netlify preview URL (`preview-{PR}--{site}.netlify.app`). + +## Local development + +Build and serve docs locally using Docker: + +```bash +cd docs +docker build -t python-sdk-docs -f Dockerfile .. +docker run --rm -p 1313:1313 python-sdk-docs +# Open http://localhost:1313/latest/ +``` + +## Key files + +| File | Purpose | +|------|---------| +| `json_builder.py` | Introspects SDK packages into `data.json` | +| `python_ref_builder.py` | Generates markdown + HTML from `data.json` | +| `templates/*.html.j2` | Jinja2 templates (replicate old Hugo shortcodes) | +| `tests/test_python_ref_builder.py` | Unit tests (`make test-docs-scripts`) | +| `../script-requirements.txt` | Python dependencies for both scripts | +| `../../docs/api_spec.toml` | Maps package names to output directories | +| `../../docs/*_template.md` | Markdown front matter templates (module/class/function) | + +## Running tests + +```bash +make test-docs-scripts +``` diff --git a/scripts/docs/python_ref_builder.py b/scripts/docs/python_ref_builder.py index 8323c2e12..6b5690771 100644 --- a/scripts/docs/python_ref_builder.py +++ b/scripts/docs/python_ref_builder.py @@ -1,26 +1,42 @@ # (C) 2023 GoodData Corporation +from __future__ import annotations + import argparse import json import os +import re from pathlib import Path from typing import TextIO import attr import toml from attr import define +from jinja2 import Environment, FileSystemLoader + +_SCRIPT_DIR = Path(__file__).resolve().parent MODULE_TEMPLATE_STRING = Path("module_template.md").read_text() CLASS_TEMPLATE_STRING = Path("class_template.md").read_text() FUNCTION_TEMPLATE_STRING = Path("function_template.md").read_text() +_JINJA_ENV = Environment( + loader=FileSystemLoader(_SCRIPT_DIR / "templates"), + autoescape=False, + keep_trailing_newline=True, +) +_OBJECT_PARTIAL_TPL = _JINJA_ENV.get_template("object_partial.html.j2") +_FUNCTION_TPL = _JINJA_ENV.get_template("function.html.j2") +_CLASS_TPL = _JINJA_ENV.get_template("class.html.j2") +_MODULE_TPL = _JINJA_ENV.get_template("module.html.j2") + @attr.s(auto_attribs=True) class RefHolder: """ """ url: str - packages: [] - directory: str + packages: list[str] = attr.Factory(list) + directory: str = "" @define @@ -30,12 +46,14 @@ class TemplateReplacementSpec: PARENT - name of the parent object NAME - name of the object LINK - link title of the object + CONTENT - pre-rendered HTML content """ parent: None | str = None name: None | str = None link: None | str = None path: None | str = None + content: None | str = None def render_template_to_str(self, template: str) -> str: """ @@ -46,6 +64,7 @@ def render_template_to_str(self, template: str) -> str: ("NAME", self.name), ("LINK", self.link), ("PATH", self.path), + ("CONTENT", self.content), ]: if replacement is not None: template = template.replace(token, replacement) @@ -56,6 +75,218 @@ def render_template_to_file(self, template: str, file: TextIO): file.write(rendered_string) +# --------------------------------------------------------------------------- +# Link resolution (replaces Hugo link partials) +# --------------------------------------------------------------------------- + + +class LinkResolver: + """Pre-compiled resolver that converts type names to HTML links. + + Builds two combined regexes (one for names containing ``_``, one for the + rest) so that docstring link resolution is O(1) per name instead of + O(n) iteration over all links. + """ + + def __init__(self, links: dict[str, dict]) -> None: + self.links = links + names_with_us: list[str] = [] + names_without_us: list[str] = [] + for name, data in links.items(): + if data.get("path"): + if "_" in name: + names_with_us.append(name) + else: + names_without_us.append(name) + + self._regex_with: re.Pattern[str] | None = None + self._regex_without: re.Pattern[str] | None = None + + if names_with_us: + alt = "|".join(re.escape(n) for n in sorted(names_with_us, key=len, reverse=True)) + # Names with _ may be surrounded by [, `, or space (left) and ], `, ., or space (right) + self._regex_with = re.compile(rf"([\[` ])({alt})([\]`. ])") + if names_without_us: + alt = "|".join(re.escape(n) for n in sorted(names_without_us, key=len, reverse=True)) + # Names without _ must be inside backticks or square brackets + self._regex_without = re.compile(rf"([\[`])({alt})([\]`])") + + def type_link(self, name: str) -> str: + """Replicate ``api-ref-link-partial.html``: resolve a single type to a hyperlink.""" + if not name: + return name or "" + orig = name + clean = name.replace("]", "").replace("Optional[", "").replace("list[", "").replace("List[", "").strip() + data = self.links.get(clean) + if data and data.get("path"): + return orig.replace(clean, f'{clean}') + return orig + + def all_links(self, text: str) -> str: + """Replicate ``api-ref-link-all-partial.html``: linkify all known type names.""" + if not text: + return text or "" + + def _repl(m: re.Match[str]) -> str: + data = self.links.get(m.group(2)) + if data and data.get("path"): + return f'{m.group(1)}{m.group(2)}{m.group(3)}' + return m.group(0) + + result = text + if self._regex_with: + result = self._regex_with.sub(_repl, result) + if self._regex_without: + result = self._regex_without.sub(_repl, result) + # Remove backticks around links + return result.replace("``", "") + + +# --------------------------------------------------------------------------- +# Template context builders +# --------------------------------------------------------------------------- + + +def _function_signature(func_data: dict) -> str: + """Build the ``arg: type, arg: type`` parameter string.""" + ds = func_data.get("docstring_parsed") + if ds and ds.get("params"): + return ", ".join(f"{p['arg_name']}: {p['type_name']}" for p in ds["params"]) + return "" + + +def _object_partial_context(obj_data: dict, path: list[str], resolver: LinkResolver) -> dict: + """Build the Jinja2 context dict for ``object_partial.html.j2``.""" + kind = obj_data.get("kind", "") + ctx: dict = {"kind": kind} + + if kind == "function": + ret_ann = obj_data.get("signature", {}).get("return_annotation", "") + ds = obj_data.get("docstring_parsed") + + ctx["name"] = path[-1] + ctx["is_property"] = bool(obj_data.get("is_property")) + ctx["signature"] = _function_signature(obj_data) + ctx["return_link"] = resolver.type_link(ret_ann) + ctx["docstring"] = bool(ds) + + if ds: + ctx["short_description"] = resolver.all_links(ds.get("short_description", "") or "") + ctx["long_description"] = resolver.all_links(ds.get("long_description", "") or "") + + # Parameters + sig_params = obj_data.get("signature", {}).get("params") or [] + doc_params = ds.get("params") if ds else None + if doc_params and len(doc_params) > 0: + ctx["params"] = [ + { + "name": p["arg_name"], + "type": resolver.type_link(p.get("type_name", "")), + "description": resolver.all_links(p.get("description", "") or ""), + } + for p in doc_params + ] + elif sig_params: + ctx["sig_params"] = [{"name": sp[0], "type": resolver.type_link(sp[1])} for sp in sig_params] + + # Returns + if ds: + returns = ds.get("returns") + if not returns: + ctx["returns"] = "no_docs" + elif returns.get("type_name") or ret_ann != "None": + type_name = returns.get("type_name") or obj_data.get("signature", {}).get("return_type", "") + description = returns.get("description", "") + ctx["returns"] = { + "type": resolver.type_link(type_name), + "description": resolver.all_links(description) if description else "", + } + else: + ctx["returns"] = "none" + else: + ctx["returns"] = "no_docs" + + elif kind == "class": + ctx["parent_name"] = path[-2] if len(path) >= 2 else "" + ctx["class_name"] = path[-1] + ds = obj_data.get("docstring_parsed") + ctx["docstring"] = bool(ds) + if ds: + ctx["short_description"] = resolver.all_links(ds.get("short_description", "") or "") + ctx["long_description"] = resolver.all_links(ds.get("long_description", "") or "") + + return ctx + + +# --------------------------------------------------------------------------- +# HTML rendering via Jinja2 templates +# --------------------------------------------------------------------------- + + +def render_function_html(func_data: dict, import_path: str, resolver: LinkResolver) -> str: + """Render a function page — replicates the ``api-ref`` shortcode.""" + path = import_path.split(".") + obj_html = _OBJECT_PARTIAL_TPL.render(**_object_partial_context(func_data, path, resolver)) + return _FUNCTION_TPL.render(object_partial=obj_html) + + +def render_class_html(class_data: dict, parent_name: str, import_path: str, resolver: LinkResolver) -> str: + """Render a class page — replicates the ``api-ref-class`` shortcode.""" + path = import_path.split(".") + functions = class_data.get("functions", {}) + + properties: list[dict] = [] + methods: list[dict] = [] + for fname, fdata in functions.items(): + if fname.startswith("_") or not isinstance(fdata, dict): + continue + fds = fdata.get("docstring_parsed") + desc = resolver.all_links(fds.get("short_description", "")) if fds else "" + if fdata.get("is_property"): + properties.append({"name_link": resolver.type_link(fname), "description": desc}) + else: + methods.append( + { + "name_link": resolver.type_link(fname), + "signature": _function_signature(fdata), + "description": desc, + } + ) + + obj_html = _OBJECT_PARTIAL_TPL.render(**_object_partial_context(class_data, path, resolver)) + return _CLASS_TPL.render(object_partial=obj_html, properties=properties, methods=methods) + + +def render_module_html(module_data: dict, resolver: LinkResolver) -> str: + """Render a module page — replicates the ``api-ref-module`` shortcode.""" + entries: list[dict] = [] + for obj_name, obj_data in module_data.items(): + if obj_name == "kind" or not isinstance(obj_data, dict): + continue + entries.append({"kind": obj_data.get("kind", ""), "name_link": resolver.type_link(obj_name)}) + return _MODULE_TPL.render(entries=entries) + + +# --------------------------------------------------------------------------- +# Page spec — collected during pass 1, rendered during pass 2 +# --------------------------------------------------------------------------- + + +@define +class _PageSpec: + kind: str # "module", "class", "function" + name: str + parent_name: str + import_path: str + file_path: Path + data: dict + + +# --------------------------------------------------------------------------- +# File structure creation (two-pass) +# --------------------------------------------------------------------------- + + def read_json_file(file_path: str) -> dict: """Load JSON data from a file. @@ -70,92 +301,114 @@ def read_json_file(file_path: str) -> dict: def create_file_structure(data: dict, root: Path, url_root: str): - """Recursively create file structure based on JSON data. + """Create file structure based on JSON data using a two-pass approach. + + Pass 1 — walk the data tree, build the ``links`` dict and collect page + specs (directory structure is created here too). + Pass 2 — render HTML for every page using the *complete* links dict. Args: data (dict): JSON data representing the object. root (Path): Path to the root directory. url_root (str): URL root path for the API reference. """ - links = {} - - def _recursive_create(data_root: dict, dir_root: Path, api_ref_root: str, module_import_path: str): - """Recursively create files and directories. + links: dict[str, dict] = {} + pages: list[_PageSpec] = [] - Args: - data_root (dict): Sub-dictionary of the JSON representing the object. - dir_root (Path): Path to the directory root. - api_ref_root (str): URL root path for the API reference. - module_import_path (str): Import path to the object. - """ + # ------------------------------------------------------------------ + # Pass 1: build links + directory tree + page specs + # ------------------------------------------------------------------ + def _pass1(data_root: dict, dir_root: Path, api_ref_root: str, module_import_path: str) -> None: dir_root.mkdir(exist_ok=True) for name, obj in data_root.items(): - # There are entries in the json, that are not dicts (ex: the field `kind`) if not isinstance(obj, dict): continue - - # If an object already has a page, skip it if name in links: continue kind = obj.get("kind", None) - obj_module_import_path = module_import_path + f".{name}" if module_import_path != "" else name - - # Remove ".functions" from the path, to correspond to the import path if ".functions" in obj_module_import_path: obj_module_import_path = obj_module_import_path.replace(".functions", "") - # Create files based on the kind of the data: module/class/function if kind == "module": - template_spec = TemplateReplacementSpec(name=name, link=name, path=obj_module_import_path) (dir_root / name).mkdir(exist_ok=True) - with (dir_root / name / "_index.md").open("w") as f: - template_spec.render_template_to_file(MODULE_TEMPLATE_STRING, f) - - # Add entry for url linking - links[name] = {"path": f"{api_ref_root}/{name}".lower(), "kind": "function"} # Lowercase for Hugo + links[name] = {"path": f"{api_ref_root}/{name}".lower(), "kind": "function"} + pages.append( + _PageSpec( + kind="module", + name=name, + parent_name="", + import_path=obj_module_import_path, + file_path=dir_root / name / "_index.md", + data=obj, + ) + ) elif kind == "class": (dir_root / name).mkdir(exist_ok=True) - template_spec = TemplateReplacementSpec( - name=name, link=name, parent=module_import_path.split(".")[-1], path=obj_module_import_path + links[name] = {"path": f"{api_ref_root}/{name}".lower(), "kind": "class"} + pages.append( + _PageSpec( + kind="class", + name=name, + parent_name=module_import_path.split(".")[-1], + import_path=obj_module_import_path, + file_path=dir_root / name / "_index.md", + data=obj, + ) ) - with (dir_root / name / "_index.md").open("w") as f: - template_spec.render_template_to_file(CLASS_TEMPLATE_STRING, f) - - # Add entry for url linking - links[name] = {"path": f"{api_ref_root}/{name}".lower(), "kind": "class"} # Lowercase for Hugo elif name == "functions": for func_name in obj: if func_name.startswith("_"): - continue # Skip magic and private methods - - with (dir_root / f"{func_name}.md").open("w") as f: - template_spec = TemplateReplacementSpec( - name=func_name, - link=func_name, - parent=module_import_path.split(".")[-1], - path=obj_module_import_path + f".{func_name}", - ) - template_spec.render_template_to_file(FUNCTION_TEMPLATE_STRING, f) - - # Add entry for url linking + continue links[func_name] = { - "path": f"{api_ref_root}/{func_name}".lower(), # Lowercase for Hugo + "path": f"{api_ref_root}/{func_name}".lower(), "kind": "function", } - continue # No need to recurse deeper, functions are the last level + pages.append( + _PageSpec( + kind="function", + name=func_name, + parent_name=module_import_path.split(".")[-1], + import_path=obj_module_import_path + f".{func_name}", + file_path=dir_root / f"{func_name}.md", + data=obj[func_name], + ) + ) + continue else: - continue # Not a class nor a module + continue + + _pass1(obj, dir_root / name, f"{api_ref_root}/{name}", obj_module_import_path) + + _pass1(data, root, url_root, "") - _recursive_create(obj, dir_root / name, f"{api_ref_root}/{name}", obj_module_import_path) + # ------------------------------------------------------------------ + # Pass 2: render HTML and write markdown files + # ------------------------------------------------------------------ + resolver = LinkResolver(links) - _recursive_create(data, root, url_root, "") + for page in pages: + if page.kind == "module": + content = render_module_html(page.data, resolver) + spec = TemplateReplacementSpec(name=page.name, link=page.name, content=content) + template = MODULE_TEMPLATE_STRING + elif page.kind == "class": + content = render_class_html(page.data, page.parent_name, page.import_path, resolver) + spec = TemplateReplacementSpec(name=page.name, link=page.name, parent=page.parent_name, content=content) + template = CLASS_TEMPLATE_STRING + elif page.kind == "function": + content = render_function_html(page.data, page.import_path, resolver) + spec = TemplateReplacementSpec(name=page.name, link=page.name, parent=page.parent_name, content=content) + template = FUNCTION_TEMPLATE_STRING + else: + continue - return links + with page.file_path.open("w") as f: + spec.render_template_to_file(template, f) def change_json_root(data: dict, json_start_paths: list[str] | None) -> dict: @@ -180,7 +433,7 @@ def change_json_root(data: dict, json_start_paths: list[str] | None) -> dict: return new_json -def parse_toml(toml_path: str, version: str, root_directory: str) -> [RefHolder]: +def parse_toml(toml_path: str, version: str, root_directory: str) -> list[RefHolder]: references = [] # In case of missing toml_file, we need a default for the api-references if not os.path.exists(toml_path): @@ -210,15 +463,11 @@ def main(): args = parser.parse_args() references = parse_toml(args.toml_file, args.version, args.root_directory) - links = {} for ref in references: print(f"Parsing: {ref.url}") data = read_json_file(args.json_file) data = change_json_root(data, ref.packages) - links.update(create_file_structure(data, Path(ref.directory), url_root=ref.url)) - with open(f"{args.root_directory}/{args.version}/links.json", "w") as f: - json.dump(links, f, indent=4) - print("Dumping the links.json") + create_file_structure(data, Path(ref.directory), url_root=ref.url) if __name__ == "__main__": diff --git a/scripts/docs/templates/class.html.j2 b/scripts/docs/templates/class.html.j2 new file mode 100644 index 000000000..2bde05ce7 --- /dev/null +++ b/scripts/docs/templates/class.html.j2 @@ -0,0 +1,57 @@ +{#- Replicates the api-ref-class shortcode -#} +
+{{ object_partial }} +

Properties

+{%- if properties %} + + + + + + + + +{%- for prop in properties %} + + +{%- if prop.description %} + +{%- else %} + +{%- endif %} + +{%- endfor %} + +
NameDescription
+{{ prop.name_link }} +{{ prop.description }}None
+{%- else %} + None +{%- endif %} +

Methods

+{%- if methods %} + + + + + + + + +{%- for method in methods %} + + +{%- if method.description %} + +{%- else %} + +{%- endif %} + +{%- endfor %} + +
NameDescription
{{ method.name_link }}({{ method.signature }}) +{{ method.description }}None
+{%- else %} + None +{%- endif %} +
diff --git a/scripts/docs/templates/function.html.j2 b/scripts/docs/templates/function.html.j2 new file mode 100644 index 000000000..7d0e28c20 --- /dev/null +++ b/scripts/docs/templates/function.html.j2 @@ -0,0 +1,4 @@ +{#- Replicates the api-ref shortcode (function/property page) -#} +
+{{ object_partial }} +
diff --git a/scripts/docs/templates/module.html.j2 b/scripts/docs/templates/module.html.j2 new file mode 100644 index 000000000..f99e42404 --- /dev/null +++ b/scripts/docs/templates/module.html.j2 @@ -0,0 +1,19 @@ +{#- Replicates the api-ref-module shortcode -#} +
+ + + + + + + + +{%- for entry in entries %} + + + + +{%- endfor %} + +
TypeName
{{ entry.kind }}{{ entry.name_link }}
+
diff --git a/scripts/docs/templates/object_partial.html.j2 b/scripts/docs/templates/object_partial.html.j2 new file mode 100644 index 000000000..9cfd349aa --- /dev/null +++ b/scripts/docs/templates/object_partial.html.j2 @@ -0,0 +1,101 @@ +{#- Replicates api-ref-object-partial.html -#} +{#- NOTE: the original Hugo template has a bug — the CSS class condition compares + against " function" (with leading space), so it never matches. We replicate + the resulting output: the class is always "python-ref python-ref-class ". -#} +
+{%- if kind == "function" %} +{%- if not is_property %} +

{{ name }}({{ signature }}) -> {{ return_link }}

+{%- else %} +

{{ name }} -> {{ return_link }}

+{%- endif %} +{%- if docstring %} +
+

{{ short_description }}

+

{{ long_description }}

+
+{%- endif %} +{%- if not is_property %} +

Parameters

+{%- if params %} + + + + + + + + + +{%- for p in params %} + + + + + +{%- endfor %} + +
nametypedescription
{{ p.name }} {{ p.type }} {{ p.description }} +
+{%- elif sig_params %} + + + + + + + + + +{%- for p in sig_params %} + + + + + +{%- endfor %} + +
nametypedescription
{{ p.name }} {{ p.type }} None
+{%- else %} + None +{%- endif %} +{%- endif %} +

Returns

+{%- if returns == "no_docs" %} + No docs +{%- elif returns == "none" %} + None +{%- elif returns %} + + + + + + + + + + + + + +
typedescription
+{{ returns.type }} + +{%- if returns.description %} +{{ returns.description }} +{%- else %} + None +{%- endif %} +
+{%- endif %} +{%- elif kind == "class" %} +

{{ parent_name }}.{{ class_name }}

+{%- if docstring %} +
+

{{ short_description }}

+

{{ long_description }}

+
+{%- endif %} +{%- endif %} +
diff --git a/scripts/docs/tests/test_python_ref_builder.py b/scripts/docs/tests/test_python_ref_builder.py new file mode 100644 index 000000000..1278b9921 --- /dev/null +++ b/scripts/docs/tests/test_python_ref_builder.py @@ -0,0 +1,400 @@ +# (C) 2026 GoodData Corporation +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +# The scripts/docs directory is not a package — add it to sys.path so we can import. +# The module also reads *_template.md at import time from cwd, so we chdir into docs/. +_SCRIPTS_DOCS = Path(__file__).resolve().parent.parent +_DOCS_DIR = _SCRIPTS_DOCS.parent.parent / "docs" + + +@pytest.fixture(autouse=True) +def _chdir_to_docs(monkeypatch: pytest.MonkeyPatch) -> None: + """python_ref_builder reads template files relative to cwd at import time.""" + monkeypatch.chdir(_DOCS_DIR) + if str(_SCRIPTS_DOCS) not in sys.path: + monkeypatch.syspath_prepend(str(_SCRIPTS_DOCS)) + + +@pytest.fixture() +def _mod(): + """Lazily import the module (after cwd/syspath are set).""" + import python_ref_builder as mod + + return mod + + +# --------------------------------------------------------------------------- +# Sample data fixtures (mimic json_builder.py output structure) +# --------------------------------------------------------------------------- + +SAMPLE_LINKS: dict[str, dict] = { + "CatalogWorkspace": {"path": "/latest/api-reference/catalogworkspace", "kind": "class"}, + "CatalogDataSource": {"path": "/latest/api-reference/catalogdatasource", "kind": "class"}, + "some_util": {"path": "/latest/api-reference/some_util", "kind": "function"}, + "Insight": {"path": "/latest/api-reference/insight", "kind": "class"}, +} + +SAMPLE_FUNCTION_DATA: dict = { + "kind": "function", + "docstring": "List all workspaces.", + "signature": { + "params": [("workspace_id", "str"), ("name", "str")], + "return_annotation": "list[CatalogWorkspace]", + }, + "docstring_parsed": { + "short_description": "Return a `CatalogWorkspace` list.", + "long_description": "Fetches all workspaces from the server.", + "params": [ + {"arg_name": "workspace_id", "type_name": "str", "description": "The workspace ID."}, + {"arg_name": "name", "type_name": "Optional[str]", "description": "Optional filter."}, + ], + "returns": { + "type_name": "list[CatalogWorkspace]", + "description": "All matching `CatalogWorkspace` objects.", + }, + }, +} + +SAMPLE_PROPERTY_DATA: dict = { + "kind": "function", + "is_property": True, + "docstring": "The workspace name.", + "signature": {"params": [], "return_annotation": "str"}, + "docstring_parsed": { + "short_description": "The workspace name.", + "long_description": "", + "params": [], + "returns": None, + }, +} + +SAMPLE_CLASS_DATA: dict = { + "kind": "class", + "docstring": "Represents a workspace.", + "docstring_parsed": { + "short_description": "A catalog workspace object.", + "long_description": "", + }, + "functions": { + "list_workspaces": SAMPLE_FUNCTION_DATA, + "name": SAMPLE_PROPERTY_DATA, + "_private": {"kind": "function"}, + }, +} + +SAMPLE_MODULE_DATA: dict = { + "kind": "module", + "CatalogWorkspace": {"kind": "class"}, + "some_util": {"kind": "function"}, +} + + +# =================================================================== +# LinkResolver +# =================================================================== + + +class TestLinkResolver: + def test_type_link_known_type(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + result = resolver.type_link("CatalogWorkspace") + assert result == 'CatalogWorkspace' + + def test_type_link_unknown_type(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + assert resolver.type_link("UnknownType") == "UnknownType" + + def test_type_link_empty(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + assert resolver.type_link("") == "" + + def test_type_link_none(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + assert resolver.type_link(None) == "" + + def test_type_link_optional_wrapper(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + result = resolver.type_link("Optional[CatalogWorkspace]") + assert "Optional[" in result + assert 'CatalogWorkspace' in result + + def test_type_link_list_wrapper(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + result = resolver.type_link("list[CatalogWorkspace]") + assert "list[" in result + assert 'CatalogWorkspace' in result + + def test_all_links_backtick_name_without_underscore(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + result = resolver.all_links("Returns a `CatalogWorkspace` object.") + assert 'CatalogWorkspace' in result + # Backticks around a resolved link should be stripped + assert "`CatalogWorkspace`" not in result + + def test_all_links_name_with_underscore_in_backticks(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + result = resolver.all_links("Call `some_util` for help.") + assert 'some_util' in result + + def test_all_links_name_with_underscore_after_space(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + result = resolver.all_links("Use some_util here.") + assert 'some_util' in result + + def test_all_links_empty(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + assert resolver.all_links("") == "" + + def test_all_links_none(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + assert resolver.all_links(None) == "" + + def test_all_links_no_matches(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + text = "Plain text with no type names." + assert resolver.all_links(text) == text + + def test_all_links_multiple_names(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + result = resolver.all_links("Returns `CatalogWorkspace` or `Insight`.") + assert "catalogworkspace" in result + assert "insight" in result + + +# =================================================================== +# TemplateReplacementSpec +# =================================================================== + + +class TestTemplateReplacementSpec: + def test_render_replaces_all_tokens(self, _mod): + spec = _mod.TemplateReplacementSpec(parent="sdk", name="MyClass", link="MyClass", content="
hello
") + template = "PARENT.NAME (LINK)\nCONTENT" + result = spec.render_template_to_str(template) + assert result == "sdk.MyClass (MyClass)\n
hello
" + + def test_render_skips_none_tokens(self, _mod): + spec = _mod.TemplateReplacementSpec(name="Foo") + template = "PARENT.NAME" + result = spec.render_template_to_str(template) + # PARENT is None so left as-is + assert result == "PARENT.Foo" + + +# =================================================================== +# _function_signature +# =================================================================== + + +class TestFunctionSignature: + def test_with_docstring_params(self, _mod): + result = _mod._function_signature(SAMPLE_FUNCTION_DATA) + assert result == "workspace_id: str, name: Optional[str]" + + def test_without_docstring(self, _mod): + data: dict = {"kind": "function", "signature": {"params": []}} + assert _mod._function_signature(data) == "" + + def test_empty_params(self, _mod): + data: dict = {"kind": "function", "docstring_parsed": {"params": []}} + assert _mod._function_signature(data) == "" + + +# =================================================================== +# _object_partial_context +# =================================================================== + + +class TestObjectPartialContext: + def test_function_context(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + ctx = _mod._object_partial_context(SAMPLE_FUNCTION_DATA, ["sdk", "list_workspaces"], resolver) + assert ctx["kind"] == "function" + assert ctx["name"] == "list_workspaces" + assert ctx["is_property"] is False + assert "params" in ctx + assert len(ctx["params"]) == 2 + assert isinstance(ctx["returns"], dict) + assert ctx["returns"]["type"] # should have a rendered link + + def test_property_context(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + ctx = _mod._object_partial_context(SAMPLE_PROPERTY_DATA, ["CatalogWorkspace", "name"], resolver) + assert ctx["is_property"] is True + assert ctx["returns"] == "no_docs" + + def test_class_context(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + ctx = _mod._object_partial_context(SAMPLE_CLASS_DATA, ["sdk", "CatalogWorkspace"], resolver) + assert ctx["kind"] == "class" + assert ctx["parent_name"] == "sdk" + assert ctx["class_name"] == "CatalogWorkspace" + assert ctx["docstring"] is True + + def test_class_context_no_docstring(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + data: dict = {"kind": "class"} + ctx = _mod._object_partial_context(data, ["mod", "Empty"], resolver) + assert ctx["docstring"] is False + + +# =================================================================== +# HTML rendering functions +# =================================================================== + + +class TestRenderFunctionHtml: + def test_produces_html(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + html = _mod.render_function_html(SAMPLE_FUNCTION_DATA, "sdk.list_workspaces", resolver) + assert '
' in html + assert "list_workspaces" in html + assert "Parameters" in html + + def test_property_no_parameters(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + html = _mod.render_function_html(SAMPLE_PROPERTY_DATA, "CatalogWorkspace.name", resolver) + assert "name" in html + # Properties should not show a Parameters section + assert "Parameters" not in html + + +class TestRenderClassHtml: + def test_produces_html_with_methods_and_properties(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + html = _mod.render_class_html(SAMPLE_CLASS_DATA, "sdk", "sdk.CatalogWorkspace", resolver) + assert '
' in html + assert "Properties" in html + assert "Methods" in html + # Private method should be excluded + assert "_private" not in html + + def test_class_no_functions(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + data: dict = {"kind": "class", "functions": {}} + html = _mod.render_class_html(data, "sdk", "sdk.Empty", resolver) + assert "Properties" in html + assert " None " in html + + +class TestRenderModuleHtml: + def test_produces_table(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + html = _mod.render_module_html(SAMPLE_MODULE_DATA, resolver) + assert '
' in html + assert "class" in html + assert "function" in html + + def test_skips_kind_key(self, _mod): + resolver = _mod.LinkResolver(SAMPLE_LINKS) + html = _mod.render_module_html(SAMPLE_MODULE_DATA, resolver) + # The "kind" key on the module dict itself should not appear as an entry + # 1 header row + 2 data rows = 3 + assert html.count("") == 3 + + +# =================================================================== +# change_json_root +# =================================================================== + + +class TestChangeJsonRoot: + def test_none_paths_returns_original(self, _mod): + data = {"a": 1} + assert _mod.change_json_root(data, None) is data + + def test_single_path(self, _mod): + data = {"sdk": {"catalog": {"kind": "module"}}} + result = _mod.change_json_root(data, ["sdk.catalog"]) + assert result == {"catalog": {"kind": "module"}} + + def test_multiple_paths(self, _mod): + data = { + "sdk": {"kind": "module", "cls": {"kind": "class"}}, + "pandas": {"kind": "module"}, + } + result = _mod.change_json_root(data, ["sdk", "pandas"]) + assert "sdk" in result + assert "pandas" in result + + +# =================================================================== +# create_file_structure (integration — uses tmp_path) +# =================================================================== + + +class TestCreateFileStructure: + def test_creates_module_class_and_function_files(self, _mod, tmp_path): + data = { + "mymodule": { + "kind": "module", + "MyClass": { + "kind": "class", + "docstring": "A class.", + "docstring_parsed": { + "short_description": "A class.", + "long_description": "", + }, + "functions": { + "do_stuff": { + "kind": "function", + "docstring": "Do stuff.", + "signature": { + "params": [], + "return_annotation": "None", + }, + "docstring_parsed": { + "short_description": "Do stuff.", + "long_description": "", + "params": [], + "returns": None, + }, + }, + "_hidden": {"kind": "function"}, + }, + }, + }, + } + _mod.create_file_structure(data, tmp_path, "/latest/api-reference") + + # Module index + module_index = tmp_path / "mymodule" / "_index.md" + assert module_index.exists() + content = module_index.read_text() + assert "mymodule" in content + + # Class index + class_index = tmp_path / "mymodule" / "MyClass" / "_index.md" + assert class_index.exists() + content = class_index.read_text() + assert "MyClass" in content + assert "python-ref" in content + + # Function page + func_page = tmp_path / "mymodule" / "MyClass" / "do_stuff.md" + assert func_page.exists() + content = func_page.read_text() + assert "do_stuff" in content + + # Private function should be skipped + assert not (tmp_path / "mymodule" / "MyClass" / "_hidden.md").exists() + + def test_duplicate_names_skipped(self, _mod, tmp_path): + data = { + "mod1": { + "kind": "module", + "Shared": {"kind": "class", "functions": {}}, + }, + "mod2": { + "kind": "module", + "Shared": {"kind": "class", "functions": {}}, + }, + } + # Should not raise — second "Shared" is skipped + _mod.create_file_structure(data, tmp_path, "/latest/api-reference") + assert (tmp_path / "mod1" / "Shared" / "_index.md").exists() diff --git a/scripts/generate.sh b/scripts/generate.sh index 17ce5ee4b..c97873358 100755 --- a/scripts/generate.sh +++ b/scripts/generate.sh @@ -119,9 +119,10 @@ highest_version=$(ls -v1 ./versioned_docs/ | grep -E '^[0-9]+.[0-9]+$' | sort -V echo "Moving ${highest_version} to /latest" mv -f ./versioned_docs/$highest_version ./versioned_docs/latest -# Replace "/${highest_version}/" with "/latest/" using sed -sed "s|${highest_version}|latest|g" ./versioned_docs/latest/links.json > temp.json - -mv temp.json ./versioned_docs/latest/links.json +# Replace "/${highest_version}/" with "/latest/" in links.json (if it exists) +if [ -f "./versioned_docs/latest/links.json" ]; then + sed "s|${highest_version}|latest|g" ./versioned_docs/latest/links.json > temp.json + mv temp.json ./versioned_docs/latest/links.json +fi popd diff --git a/scripts/script-requirements.txt b/scripts/script-requirements.txt index e489e0118..6cc8d065e 100644 --- a/scripts/script-requirements.txt +++ b/scripts/script-requirements.txt @@ -1,4 +1,5 @@ docstring_parser~=0.15 +jinja2~=3.1 toml~=0.10.2 -e./gooddata-api-client -e./packages/gooddata-sdk diff --git a/uv.lock b/uv.lock index 82404fa42..2b09e4ddd 100644 --- a/uv.lock +++ b/uv.lock @@ -1026,6 +1026,7 @@ test = [ { name = "pytest-order" }, { name = "pytest-snapshot" }, { name = "python-dotenv" }, + { name = "toml" }, { name = "urllib3" }, { name = "vcrpy" }, ] @@ -1071,6 +1072,7 @@ test = [ { name = "pytest-order", specifier = "~=1.3.0" }, { name = "pytest-snapshot", specifier = "==0.9.0" }, { name = "python-dotenv", specifier = "~=1.0.0" }, + { name = "toml", specifier = "~=0.10.2" }, { name = "urllib3", specifier = "~=2.6.0" }, { name = "vcrpy", specifier = "~=8.0.0" }, ] @@ -2460,6 +2462,15 @@ requires-dist = [ { name = "requests" }, ] +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253, upload-time = "2020-11-01T01:40:22.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload-time = "2020-11-01T01:40:20.672Z" }, +] + [[package]] name = "tomli" version = "2.4.0"