diff --git a/docs.bzl b/docs.bzl index 8f728f123..4f1ec3131 100644 --- a/docs.bzl +++ b/docs.bzl @@ -69,7 +69,7 @@ def _rewrite_needs_json_to_sourcelinks(labels): out.append(s) return out -def _merge_sourcelinks(name, sourcelinks): +def _merge_sourcelinks(name, sourcelinks, known_good = None): """Merge multiple sourcelinks JSON files into a single file. Args: @@ -77,15 +77,22 @@ def _merge_sourcelinks(name, sourcelinks): sourcelinks: List of sourcelinks JSON file targets """ + extra_srcs = [] + known_good_arg = "" + if known_good != None: + extra_srcs = [known_good] + known_good_arg = "--known_good $(location %s)" % known_good + native.genrule( name = name, - srcs = sourcelinks, + srcs = sourcelinks + extra_srcs, outs = [name + ".json"], cmd = """ $(location @score_docs_as_code//scripts_bazel:merge_sourcelinks) \ --output $@ \ + {known_good_arg} \ $(SRCS) - """, + """.format(known_good_arg = known_good_arg), tools = ["@score_docs_as_code//scripts_bazel:merge_sourcelinks"], ) @@ -120,7 +127,7 @@ def _missing_requirements(deps): fail(msg) fail("This case should be unreachable?!") -def docs(source_dir = "docs", data = [], deps = [], scan_code = []): +def docs(source_dir = "docs", data = [], deps = [], scan_code = [], known_good = None): """Creates all targets related to documentation. By using this function, you'll get any and all updates for documentation targets in one place. @@ -175,7 +182,7 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = []): data_with_docs_sources = _rewrite_needs_json_to_docs_sources(data) additional_combo_sourcelinks = _rewrite_needs_json_to_sourcelinks(data) - _merge_sourcelinks(name = "merged_sourcelinks", sourcelinks = [":sourcelinks_json"] + additional_combo_sourcelinks) + _merge_sourcelinks(name = "merged_sourcelinks", sourcelinks = [":sourcelinks_json"] + additional_combo_sourcelinks, known_good = known_good) py_binary( name = "docs", diff --git a/scripts_bazel/BUILD b/scripts_bazel/BUILD index 81c9212f1..befe51730 100644 --- a/scripts_bazel/BUILD +++ b/scripts_bazel/BUILD @@ -33,6 +33,7 @@ py_binary( py_binary( name = "merge_sourcelinks", srcs = ["merge_sourcelinks.py"], + deps= [ "//src/extensions/score_source_code_linker"], main = "merge_sourcelinks.py", visibility = ["//visibility:public"], ) diff --git a/scripts_bazel/generate_sourcelinks_cli.py b/scripts_bazel/generate_sourcelinks_cli.py index 4291b97c5..6f09ef075 100644 --- a/scripts_bazel/generate_sourcelinks_cli.py +++ b/scripts_bazel/generate_sourcelinks_cli.py @@ -25,8 +25,10 @@ from src.extensions.score_source_code_linker.generate_source_code_links_json import ( _extract_references_from_file, # pyright: ignore[reportPrivateUsage] TODO: move it out of the extension and into this script ) +from src.extensions.score_source_code_linker.helpers import parse_module_name_from_path from src.extensions.score_source_code_linker.needlinks import ( - store_source_code_links_json, + MetaData, + store_source_code_links_with_metadata_json, ) logging.basicConfig(level=logging.INFO, format="%(message)s") @@ -37,13 +39,13 @@ def main(): parser = argparse.ArgumentParser( description="Generate source code links JSON from source files" ) - parser.add_argument( + _ = parser.add_argument( "--output", required=True, type=Path, help="Output JSON file path", ) - parser.add_argument( + _ = parser.add_argument( "files", nargs="*", type=Path, @@ -53,15 +55,25 @@ def main(): args = parser.parse_args() all_need_references = [] + metadata: MetaData = { + "module_name": "", + "hash": "", + "url": "", + } + metadata_set = False for file_path in args.files: + if "known_good.json" not in str(file_path) and not metadata_set: + metadata["module_name"] = parse_module_name_from_path(file_path) + metadata_set = True abs_file_path = file_path.resolve() assert abs_file_path.exists(), abs_file_path references = _extract_references_from_file( - abs_file_path.parent, Path(abs_file_path.name) + abs_file_path.parent, Path(abs_file_path.name), file_path ) all_need_references.extend(references) - - store_source_code_links_json(args.output, all_need_references) + store_source_code_links_with_metadata_json( + file=args.output, metadata=metadata, needlist=all_need_references + ) logger.info( f"Found {len(all_need_references)} need references in {len(args.files)} files" ) diff --git a/scripts_bazel/merge_sourcelinks.py b/scripts_bazel/merge_sourcelinks.py index f194e19ca..932d91380 100644 --- a/scripts_bazel/merge_sourcelinks.py +++ b/scripts_bazel/merge_sourcelinks.py @@ -21,21 +21,42 @@ import sys from pathlib import Path +from src.extensions.score_source_code_linker.helpers import parse_info_from_known_good + logging.basicConfig(level=logging.INFO, format="%(message)s") logger = logging.getLogger(__name__) + +""" +if bazel-out/k8-fastbuild/bin/external/ in file_path => module is external +otherwise it's local +if local => module_name & hash == empty +if external => parse thing for module_name => look up known_good json for hash & url +""" + + + +def add_needid_to_metaneed_mapping(mapping: dict[str, dict[str, str]], metadata: dict[str, str], needid: str): + mapping + pass + def main(): parser = argparse.ArgumentParser( description="Merge multiple sourcelinks JSON files into one" ) - parser.add_argument( + _ = parser.add_argument( "--output", required=True, type=Path, help="Output merged JSON file path", ) - parser.add_argument( + _ = parser.add_argument( + "--known_good", + required=True, + help="Optional path to a 'known good' JSON file (provided by Bazel).", + ) + _ = parser.add_argument( "files", nargs="*", type=Path, @@ -43,14 +64,28 @@ def main(): ) args = parser.parse_args() + all_files = [x for x in args.files if "known_good.json" not in str(x)] merged = [] - for json_file in args.files: + needs_metadata_mapping = {} + for json_file in all_files: with open(json_file) as f: data = json.load(f) - assert isinstance(data, list), repr(data) - merged.extend(data) + metadata = data[0] + if metadata["module_name"] and metadata["module_name"] != "local_module": + hash, repo = parse_info_from_known_good( + known_good_json=args.known_good, module_name=metadata["module_name"] + ) + metadata["hash"] = hash + metadata["url"] = repo + # In the case that 'metadata[module_name]' is 'local_module' + # hash & url are already existing and empty inside of 'metadata' + # Therefore all 3 keys will be written to needlinks in each branch + for d in data[1:]: + d.update(metadata) + assert isinstance(data, list), repr(data) + merged.extend(data[1:]) with open(args.output, "w") as f: json.dump(merged, f, indent=2, ensure_ascii=False) diff --git a/src/extensions/score_source_code_linker/BUILD b/src/extensions/score_source_code_linker/BUILD index e3c289c66..55d471374 100644 --- a/src/extensions/score_source_code_linker/BUILD +++ b/src/extensions/score_source_code_linker/BUILD @@ -54,6 +54,7 @@ py_library( "needlinks.py", "testlink.py", "xml_parser.py", + "helpers.py", ], imports = ["."], visibility = ["//visibility:public"], diff --git a/src/extensions/score_source_code_linker/__init__.py b/src/extensions/score_source_code_linker/__init__.py index cf9843dc5..d80754036 100644 --- a/src/extensions/score_source_code_linker/__init__.py +++ b/src/extensions/score_source_code_linker/__init__.py @@ -21,7 +21,6 @@ # This whole directory implements the above mentioned tool requirements import os -from collections import defaultdict from copy import deepcopy from pathlib import Path from typing import cast @@ -35,18 +34,22 @@ from src.extensions.score_source_code_linker.generate_source_code_links_json import ( generate_source_code_links_json, ) +from src.extensions.score_source_code_linker.helpers import get_github_link +from src.extensions.score_source_code_linker.module_source_links import ( + group_needs_by_module, + load_module_source_links_json, + store_module_source_links_json, +) from src.extensions.score_source_code_linker.need_source_links import ( - NeedSourceLinks, - SourceCodeLinks, + group_by_need, load_source_code_links_combined_json, store_source_code_links_combined_json, ) from src.extensions.score_source_code_linker.needlinks import ( - NeedLink, load_source_code_links_json, + load_source_code_links_with_metadata_json, ) from src.extensions.score_source_code_linker.testlink import ( - DataForTestLink, load_data_of_test_case_json, load_test_xml_parsed_json, ) @@ -58,7 +61,6 @@ find_git_root, find_ws_root, ) -from src.helper_lib.additional_functions import get_github_link LOGGER = get_logger(__name__) # Uncomment this to enable more verbose logging @@ -71,53 +73,6 @@ # ╰──────────────────────────────────────╯ -def group_by_need( - source_code_links: list[NeedLink], - test_case_links: list[DataForTestLink] | None = None, -) -> list[SourceCodeLinks]: - """ - Groups the given need links and test case links by their need ID. - Returns a nested dictionary structure with 'CodeLink' and 'TestLink' categories. - Example output: - - - { - "need": "", - "links": { - "CodeLinks": [NeedLink, NeedLink, ...], - "TestLinks": [testlink, testlink, ...] - } - } - """ - # TODO: I wonder if there is a more efficent way to do this - grouped_by_need: dict[str, NeedSourceLinks] = defaultdict( - lambda: NeedSourceLinks(TestLinks=[], CodeLinks=[]) - ) - - # Group source code links - for needlink in source_code_links: - grouped_by_need[needlink.need].CodeLinks.append(needlink) - - # Group test case links - if test_case_links is not None: - for testlink in test_case_links: - grouped_by_need[testlink.need].TestLinks.append(testlink) - - # Build final list of SourceCodeLinks - result: list[SourceCodeLinks] = [ - SourceCodeLinks( - need=need, - links=NeedSourceLinks( - CodeLinks=need_links.CodeLinks, - TestLinks=need_links.TestLinks, - ), - ) - for need, need_links in grouped_by_need.items() - ] - - return result - - def get_cache_filename(build_dir: Path, filename: str) -> Path: """ Returns the path to the cache file for the source code linker. @@ -142,14 +97,19 @@ def build_and_save_combined_file(outdir: Path): else: source_code_links_json = Path(source_code_links_json) - source_code_links = load_source_code_links_json(source_code_links_json) + # This isn't pretty will think of a better solution later, for now this should work + try: + source_code_links = load_source_code_links_json(source_code_links_json) + except AssertionError: + source_code_links = load_source_code_links_with_metadata_json( + source_code_links_json + ) test_code_links = load_test_xml_parsed_json( get_cache_filename(outdir, "score_xml_parser_cache.json") ) - + scl_list = group_by_need(source_code_links, test_code_links) store_source_code_links_combined_json( - outdir / "score_scl_grouped_cache.json", - group_by_need(source_code_links, test_code_links), + outdir / "score_scl_grouped_cache.json", scl_list ) @@ -254,17 +214,20 @@ def setup_test_code_linker(app: Sphinx, env: BuildEnvironment): def register_combined_linker(app: Sphinx): - # Registering the combined linker to Sphinx + # Registering the final combine linker to Sphinx # priority is set to make sure it is called in the right order. - # Needs to be called after xml parsing & codelink - app.connect("env-updated", setup_combined_linker, priority=507) + # Needs to be called after xml parsing & codelink & combined_linker + app.connect("env-updated", setup_combined_linker, priority=510) def setup_combined_linker(app: Sphinx, _: BuildEnvironment): grouped_cache = get_cache_filename(app.outdir, "score_scl_grouped_cache.json") - gruped_cache_exists = grouped_cache.exists() + grouped_cache_exists = grouped_cache.exists() # TODO this cache should be done via Bazel - if not gruped_cache_exists or not app.config.skip_rescanning_via_source_code_linker: + if ( + not grouped_cache_exists + or not app.config.skip_rescanning_via_source_code_linker + ): LOGGER.debug( "Did not find combined json 'score_scl_grouped_cache.json' in _build." "Generating new one" @@ -272,6 +235,37 @@ def setup_combined_linker(app: Sphinx, _: BuildEnvironment): build_and_save_combined_file(app.outdir) +def register_module_linker(app: Sphinx): + # Registering the combined linker to Sphinx + # priority is set to make sure it is called in the right order. + # Needs to be called after xml parsing & codelink + app.connect("env-updated", setup_module_linker, priority=520) + + +def build_and_save_module_scl_file(outdir: Path): + scl_links = load_source_code_links_combined_json( + get_cache_filename(outdir, "score_scl_grouped_cache.json") + ) + mcl_links = group_needs_by_module(scl_links) + store_module_source_links_json( + outdir / "score_module_grouped_scl_cache.json", mcl_links + ) + + +def setup_module_linker(app: Sphinx, _: BuildEnvironment): + grouped_cache = get_cache_filename( + app.outdir, "score_module_grouped_scl_cache.json" + ) + gruped_cache_exists = grouped_cache.exists() + # TODO this cache should be done via Bazel + if not gruped_cache_exists or not app.config.skip_rescanning_via_source_code_linker: + LOGGER.debug( + "Did not find combined json 'score_module_grouped_scl_cache.json' " + "in _build. Generating new one" + ) + build_and_save_module_scl_file(app.outdir) + + def setup_once(app: Sphinx): # might be the only way to solve this? if "skip_rescanning_via_source_code_linker" in app.config: @@ -295,9 +289,10 @@ def setup_once(app: Sphinx): setup_source_code_linker(app, ws_root) register_test_code_linker(app) register_combined_linker(app) + register_module_linker(app) - # Priorty=510 to ensure it's called after the test code linker & combined connection - app.connect("env-updated", inject_links_into_needs, priority=510) + # Priorty=515 to ensure it's called after the test code linker & combined connection + app.connect("env-updated", inject_links_into_needs, priority=525) def setup(app: Sphinx) -> dict[str, str | bool]: @@ -352,42 +347,43 @@ def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: f"?? Need {id} already has testlink: {need.get('testlink')}" ) - source_code_links_by_need = load_source_code_links_combined_json( - get_cache_filename(app.outdir, "score_scl_grouped_cache.json") + scl_by_module = load_module_source_links_json( + get_cache_filename(app.outdir, "score_module_grouped_scl_cache.json") ) + for module_grouped_needs in scl_by_module: + for source_code_links in module_grouped_needs.needs: + need = find_need(needs_copy, source_code_links.need) + if need is None: + # TODO: print github annotations as in https://github.com/eclipse-score/bazel_registry/blob/7423b9996a45dd0a9ec868e06a970330ee71cf4f/tools/verify_semver_compatibility_level.py#L126-L129 + for n in source_code_links.links.CodeLinks: + LOGGER.warning( + f"{n.file}:{n.line}: Could not find {source_code_links.need} " + "in documentation [CODE LINK]", + type="score_source_code_linker", + ) + for n in source_code_links.links.TestLinks: + LOGGER.warning( + f"{n.file}:{n.line}: Could not find {source_code_links.need} " + "in documentation [TEST LINK]", + type="score_source_code_linker", + ) + continue + + need_as_dict = cast(dict[str, object], need) + metadata = module_grouped_needs.module + need_as_dict["source_code_link"] = ", ".join( + f"{get_github_link(metadata, n)}<>{n.file}:{n.line}" + for n in source_code_links.links.CodeLinks + ) + need_as_dict["testlink"] = ", ".join( + f"{get_github_link(metadata, n)}<>{n.name}" + for n in source_code_links.links.TestLinks + ) - for source_code_links in source_code_links_by_need: - need = find_need(needs_copy, source_code_links.need) - if need is None: - # TODO: print github annotations as in https://github.com/eclipse-score/bazel_registry/blob/7423b9996a45dd0a9ec868e06a970330ee71cf4f/tools/verify_semver_compatibility_level.py#L126-L129 - for n in source_code_links.links.CodeLinks: - LOGGER.warning( - f"{n.file}:{n.line}: Could not find {source_code_links.need} " - "in documentation [CODE LINK]", - type="score_source_code_linker", - ) - for n in source_code_links.links.TestLinks: - LOGGER.warning( - f"{n.file}:{n.line}: Could not find {source_code_links.need} " - "in documentation [TEST LINK]", - type="score_source_code_linker", - ) - continue - - need_as_dict = cast(dict[str, object], need) - - need_as_dict["source_code_link"] = ", ".join( - f"{get_github_link(n)}<>{n.file}:{n.line}" - for n in source_code_links.links.CodeLinks - ) - need_as_dict["testlink"] = ", ".join( - f"{get_github_link(n)}<>{n.name}" for n in source_code_links.links.TestLinks - ) - - # NOTE: Removing & adding the need is important to make sure - # the needs gets 're-evaluated'. - Needs_Data.remove_need(need["id"]) - Needs_Data.add_need(need) + # NOTE: Removing & adding the need is important to make sure + # the needs gets 're-evaluated'. + Needs_Data.remove_need(need["id"]) + Needs_Data.add_need(need) # ╭──────────────────────────────────────╮ diff --git a/src/extensions/score_source_code_linker/generate_source_code_links_json.py b/src/extensions/score_source_code_linker/generate_source_code_links_json.py index abedc2db4..7b39b7228 100644 --- a/src/extensions/score_source_code_linker/generate_source_code_links_json.py +++ b/src/extensions/score_source_code_linker/generate_source_code_links_json.py @@ -20,11 +20,15 @@ import os from pathlib import Path +from sphinx_needs.logging import get_logger + from src.extensions.score_source_code_linker.needlinks import ( NeedLink, store_source_code_links_json, ) +LOGGER = get_logger(__name__) + TAGS = [ "# " + "req-traceability:", "# " + "req-Id:", @@ -43,21 +47,23 @@ def _extract_references_from_line(line: str): yield tag, req.strip() -def _extract_references_from_file(root: Path, file_path: Path) -> list[NeedLink]: +def _extract_references_from_file( + root: Path, file_path_name: Path, file_path: Path +) -> list[NeedLink]: """Scan a single file for template strings and return findings.""" assert root.is_absolute(), "Root path must be absolute" - assert not file_path.is_absolute(), "File path must be relative to the root" + assert not file_path_name.is_absolute(), "File path must be relative to the root" # assert file_path.is_relative_to(root), ( # f"File path ({file_path}) must be relative to the root ({root})" # ) - assert (root / file_path).exists(), ( - f"File {file_path} does not exist in root {root}." + assert (root / file_path_name).exists(), ( + f"File {file_path_name} does not exist in root {root}." ) findings: list[NeedLink] = [] try: - with open(root / file_path, encoding="utf-8", errors="ignore") as f: + with open(root / file_path_name, encoding="utf-8", errors="ignore") as f: for line_num, line in enumerate(f, 1): for tag, req in _extract_references_from_line(line): findings.append( @@ -69,8 +75,9 @@ def _extract_references_from_file(root: Path, file_path: Path) -> list[NeedLink] full_line=line.strip(), ) ) - except (UnicodeDecodeError, PermissionError, OSError): + except (UnicodeDecodeError, PermissionError, OSError) as e: # Skip files that can't be read as text + LOGGER.debug(f"Error reading file to parse for linked needs: \n{e}") pass return findings @@ -117,12 +124,12 @@ def find_all_need_references(search_path: Path) -> list[NeedLink]: # Use os.walk to have better control over directory traversal for file in iterate_files_recursively(search_path): - references = _extract_references_from_file(search_path, file) + references = _extract_references_from_file(search_path,Path(file.name), file) all_need_references.extend(references) elapsed_time = os.times().elapsed - start_time - print( - f"DEBUG: Found {len(all_need_references)} need references " + LOGGER.debug( + f"Found {len(all_need_references)} need references " f"in {elapsed_time:.2f} seconds" ) diff --git a/src/extensions/score_source_code_linker/helpers.py b/src/extensions/score_source_code_linker/helpers.py new file mode 100644 index 000000000..5544e2696 --- /dev/null +++ b/src/extensions/score_source_code_linker/helpers.py @@ -0,0 +1,100 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +import json +from pathlib import Path + +from src.extensions.score_source_code_linker.module_source_links import ModuleInfo + +# Import types that depend on score_source_code_linker +from src.extensions.score_source_code_linker.needlinks import DefaultNeedLink, NeedLink +from src.extensions.score_source_code_linker.testlink import ( + DataForTestLink, + DataOfTestCase, +) +from src.helper_lib import ( + find_git_root, + get_current_git_hash, + get_github_base_url, +) + + +def get_github_link( + metadata: ModuleInfo, + link: NeedLink | DataForTestLink | DataOfTestCase | None = None, +) -> str: + if link is None: + link = DefaultNeedLink() + if not metadata.hash: + # Local path (//:docs) + return get_github_link_from_git(link) + # Ref-Integration path (//:docs_combo..) + return get_github_link_from_json(metadata, link) + + +def get_github_link_from_git( + link: NeedLink | DataForTestLink | DataOfTestCase | None = None, +) -> str: + if link is None: + link = DefaultNeedLink() + passed_git_root = find_git_root() + if passed_git_root is None: + passed_git_root = Path() + base_url = get_github_base_url() + current_hash = get_current_git_hash(passed_git_root) + return f"{base_url}/blob/{current_hash}/{link.file}#L{link.line}" + + +def get_github_link_from_json( + metadata: ModuleInfo, + link: NeedLink | DataForTestLink | DataOfTestCase | None = None, +): + if link is None: + link = DefaultNeedLink() + base_url = metadata.url + current_hash = metadata.hash + return f"{base_url}/blob/{current_hash}/{link.file}#L{link.line}" + + +def parse_module_name_from_path(path: Path) -> str: + """ + Parse out the Module-Name from the filename gotten + /home/user/.cache/bazel/aksj37981712/external/score_docs_as_code+/src/tests/testfile.py + => score_docs_as_code + """ + + # COMBO BUILD + # If external is in the filepath that gets parsed => + # file is in an external module => combo build + # Example Path: + # PosixPath('external/score_docs_as_code+/src/helper_lib/test_helper_lib.py' + + if str(path).startswith("external/"): + # This allows for files / folders etc. to have `external` in their name too. + module_raw = str(path).removeprefix("external/") + filepath_split = str(module_raw).split("/", maxsplit=1) + return str(filepath_split[0].removesuffix("+")) + # We return this when we are in a local build `//:docs` the rest of DaC knows + # What to do then if it encounters this module_name + return "local_module" + + +def parse_info_from_known_good( + known_good_json: Path, module_name: str +) -> tuple[str, str]: + with open(known_good_json) as f: + kg_json = json.load(f) + for category in kg_json["modules"].values(): + if module_name in category: + m = category[module_name] + return (m["hash"], m["repo"].removesuffix(".git")) + raise KeyError(f"Module {module_name!r} not found in known_good_json.") diff --git a/src/extensions/score_source_code_linker/module_source_links.py b/src/extensions/score_source_code_linker/module_source_links.py new file mode 100644 index 000000000..b77450eb8 --- /dev/null +++ b/src/extensions/score_source_code_linker/module_source_links.py @@ -0,0 +1,145 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + + +import json +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import Any + +from src.extensions.score_source_code_linker.need_source_links import ( + NeedSourceLinks, + SourceCodeLinks, + SourceCodeLinks_JSON_Decoder, +) +from src.extensions.score_source_code_linker.needlinks import NeedLink +from src.extensions.score_source_code_linker.testlink import DataForTestLink + + +@dataclass +class ModuleInfo: + name: str + hash: str + url: str + + +@dataclass +class ModuleSourceLinks: + module: ModuleInfo + needs: list[SourceCodeLinks] = field(default_factory=list) + + +class ModuleSourceLinks_JSON_Encoder(json.JSONEncoder): + def default(self, o: object): + if isinstance(o, Path): + return str(o) + # We do not want to save the metadata inside the codelink or testlink + # As we save this already in a structure above it + # (hash, module_name, url) + if isinstance(o, NeedLink | DataForTestLink): + return o.to_dict_without_metadata() + if isinstance(o, ModuleSourceLinks | SourceCodeLinks | NeedSourceLinks): + return asdict(o) + return super().default(o) + + +def ModuleSourceLinks_JSON_Decoder( + d: dict[str, Any], +) -> ModuleSourceLinks | dict[str, Any]: + if "module" in d and "needs" in d: + module = d["module"] + needs = d["needs"] + return ModuleSourceLinks( + module=ModuleInfo( + name=module.get("module_name"), + hash=module.get("hash"), + url=module.get("url"), + ), + # We know this can only be list[SourceCodeLinks] and nothing else + # Therefore => we ignore the type error here + needs=[SourceCodeLinks_JSON_Decoder(need) for need in needs], # type: ignore + ) + return d + + +def store_module_source_links_json( + file: Path, source_code_links: list[ModuleSourceLinks] +): + # After `rm -rf _build` or on clean builds the directory does not exist, so we need + # to create it + file.parent.mkdir(exist_ok=True) + with open(file, "w") as f: + json.dump( + source_code_links, + f, + cls=ModuleSourceLinks_JSON_Encoder, + indent=2, + ensure_ascii=False, + ) + + +def load_module_source_links_json(file: Path) -> list[ModuleSourceLinks]: + links: list[ModuleSourceLinks] = json.loads( + file.read_text(encoding="utf-8"), + object_hook=ModuleSourceLinks_JSON_Decoder, + ) + assert isinstance(links, list), ( + "The ModuleSourceLink json should be aa list of ModuleSourceLink objects." + ) + print("=====================") + print("=== TESTING LINKS IN ModuleSourceLink === ") + for link in links: + if not isinstance(link, ModuleSourceLinks): + print(f"Link not module_sourcelink: {link}") + print("=====================") + assert all(isinstance(link, ModuleSourceLinks) for link in links), ( + "All items in module source link cache should be ModuleSourceLink objects." + ) + return links + + +def group_needs_by_module(links: list[SourceCodeLinks]) -> list[ModuleSourceLinks]: + module_groups: dict[str, ModuleSourceLinks] = {} + + for source_link in links: + # Check if we can take moduleInfo from code or testlinks + if source_link.links.CodeLinks: + first_link = source_link.links.CodeLinks[0] + elif source_link.links.TestLinks: + first_link = source_link.links.TestLinks[0] + else: + # This should not happen? + continue + module_key = first_link.module_name + + if module_key not in module_groups: + module_groups[module_key] = ModuleSourceLinks( + module=ModuleInfo( + name=module_key, hash=first_link.hash, url=first_link.url + ) + ) + + module_groups[module_key].needs.append(source_link) # Much clearer! + + return [ + ModuleSourceLinks(module=group.module, needs=group.needs) + for group in module_groups.values() + ] + + +# # Pouplate Metadata +# # Since all metadata inside the Codelinks is the same +# # we can just arbitrarily grab the first one +# module_name=need_links.CodeLinks[0].module_name, +# hash=need_links.CodeLinks[0].hash, +# url=need_links.CodeLinks[0].url, diff --git a/src/extensions/score_source_code_linker/need_source_links.py b/src/extensions/score_source_code_linker/need_source_links.py index 6c738da8e..1823ba4b4 100644 --- a/src/extensions/score_source_code_linker/need_source_links.py +++ b/src/extensions/score_source_code_linker/need_source_links.py @@ -20,6 +20,7 @@ # req-Id: tool_req__docs_dd_link_source_code_link import json +from collections import defaultdict from dataclasses import asdict, dataclass, field from pathlib import Path from typing import Any @@ -108,3 +109,53 @@ def load_source_code_links_combined_json(file: Path) -> list[SourceCodeLinks]: "SourceCodeLinks objects." ) return links + + +def group_by_need( + source_code_links: list[NeedLink], + test_case_links: list[DataForTestLink] | None = None, +) -> list[SourceCodeLinks]: + """ + Groups the given need links and test case links by their need ID. + Returns a nested dictionary structure with 'CodeLink' and 'TestLink' categories. + Example output: + + + { + "need": "", + "module_name": , + "hash": , + "url": , + "links": { + "CodeLinks": [NeedLink, NeedLink, ...], + "TestLinks": [testlink, testlink, ...] + } + } + """ + # TODO: I wonder if there is a more efficent way to do this + grouped_by_need: dict[str, NeedSourceLinks] = defaultdict( + lambda: NeedSourceLinks(TestLinks=[], CodeLinks=[]) + ) + + # Group source code links + for needlink in source_code_links: + grouped_by_need[needlink.need].CodeLinks.append(needlink) + + # Group test case links + if test_case_links is not None: + for testlink in test_case_links: + grouped_by_need[testlink.need].TestLinks.append(testlink) + + # Build final list of SourceCodeLinks + result: list[SourceCodeLinks] = [ + SourceCodeLinks( + need=need, + links=NeedSourceLinks( + CodeLinks=need_links.CodeLinks, + TestLinks=need_links.TestLinks, + ), + ) + for need, need_links in grouped_by_need.items() + ] + + return result diff --git a/src/extensions/score_source_code_linker/needlinks.py b/src/extensions/score_source_code_linker/needlinks.py index 348147292..8bc8f9439 100644 --- a/src/extensions/score_source_code_linker/needlinks.py +++ b/src/extensions/score_source_code_linker/needlinks.py @@ -16,10 +16,20 @@ import os from dataclasses import asdict, dataclass from pathlib import Path -from typing import Any +from typing import Any, TypedDict, TypeGuard -@dataclass(frozen=True, order=True) +class MetaData(TypedDict): + module_name: str + hash: str + url: str + +def is_metadata(x: object) -> TypeGuard[MetaData]: + # Make this as strict/loose as you want; at minimum, it must be a dict. + return isinstance(x, dict) and {"module_name", "hash", "url"} <= x.keys() + + +@dataclass(order=True) class NeedLink: """Represents a single template string finding in a file.""" @@ -28,6 +38,19 @@ class NeedLink: tag: str need: str full_line: str + module_name: str = "" + hash: str = "" + url: str = "" + + def to_dict_full(self) -> dict[str, str | Path | int]: + return asdict(self) + + def to_dict_without_metadata(self) -> dict[str, str | Path | int]: + d = asdict(self) + d.pop("module_name", None) + d.pop("hash", None) + d.pop("url", None) + return d def DefaultNeedLink() -> NeedLink: @@ -41,6 +64,8 @@ def DefaultNeedLink() -> NeedLink: tag="", need="", full_line="", + # Module_name, hash, url are defaulted to "" + # therefore not needed to be listed ) @@ -61,26 +86,96 @@ def needlink_decoder(d: dict[str, Any]) -> NeedLink | dict[str, Any]: tag=d["tag"], need=d["need"], full_line=d["full_line"], + module_name=d.get("module_name", ""), + hash=d.get("hash", ""), + url=d.get("url", ""), ) # It's something else, pass it on to other decoders return d -def store_source_code_links_json(file: Path, needlist: list[NeedLink]): - # After `rm -rf _build` or on clean builds the directory does not exist, - # so we need to create it +def store_source_code_links_with_metadata_json( + file: Path, metadata: MetaData, needlist: list[NeedLink] +) -> None: + """ + Writes a JSON array: + [ meta_dict, needlink1, needlink2, ... ] + + meta_dict must include: + module_name, hash, url + """ + payload: list[object] = [metadata, *needlist] + + file.parent.mkdir(exist_ok=True) + with open(file, "w", encoding="utf-8") as f: + json.dump(payload, f, cls=NeedLinkEncoder, indent=2, ensure_ascii=False) + + +def store_source_code_links_json(file: Path, needlist: list[NeedLink]) -> None: + """ + Writes a JSON array: + [ needlink1, needlink2, ... ] + """ + file.parent.mkdir(exist_ok=True) - with open(file, "w") as f: - json.dump( - needlist, - f, - cls=NeedLinkEncoder, # use your custom encoder - indent=2, - ensure_ascii=False, + with open(file, "w", encoding="utf-8") as f: + json.dump(needlist, f, cls=NeedLinkEncoder, indent=2, ensure_ascii=False) + + +def _is_needlink_list(xs: list[object]) -> TypeGuard[list[NeedLink]]: + return all(isinstance(link, NeedLink) for link in xs) + + +def load_source_code_links_with_metadata_json(file: Path) -> list[NeedLink]: + """ + Expects the JSON array where first is a meta_dict: + [ meta_dict, needlink1, needlink2, ... ] + Returns: + [NeedLink, NeedLink, ...] + + This normally should be the one called 'locally' => :docs target + """ + if not file.is_absolute(): + ws_root = os.environ.get("BUILD_WORKSPACE_DIRECTORY") + if ws_root: + file = Path(ws_root) / file + + data: list[object] = json.loads( + file.read_text(encoding="utf-8"), + object_hook=needlink_decoder, + ) + links: list[object] = [] + if not is_metadata(data[0]): + raise TypeError( + "If you do not have a 'metadata' dict as the first one in the json " + "you might wanted to call the load without metadata named: " + "'load_source_code_links_json'" + ) + metadata: MetaData = data[0] + links = data[1:] + if not _is_needlink_list(links): + raise TypeError( + "In local build context all items after" + f"metadata must decode to NeedLink objects. File: {file}" ) + for d in links: + d.module_name = metadata["module_name"] + d.hash = metadata["hash"] + d.url = metadata["url"] + return links def load_source_code_links_json(file: Path) -> list[NeedLink]: + """ + Expects the JSON array with needlinks + *that already have extra info in them* (module_name, hash, url): + [ needlink1, needlink2, ... ] + Returns: + [NeedLink, NeedLink, ...] + + This normally should be the one called in combo builds + => :docs_combo_experimental target + """ if not file.is_absolute(): # use env variable set by Bazel ws_root = os.environ.get("BUILD_WORKSPACE_DIRECTORY") diff --git a/src/extensions/score_source_code_linker/testlink.py b/src/extensions/score_source_code_linker/testlink.py index ee83c7f95..bd7139271 100644 --- a/src/extensions/score_source_code_linker/testlink.py +++ b/src/extensions/score_source_code_linker/testlink.py @@ -42,6 +42,19 @@ class DataForTestLink: verify_type: str result: str result_text: str = "" + module_name: str = "" + hash: str = "" + url: str = "" + + def to_dict_full(self) -> dict[str, str | Path | int]: + return asdict(self) + + def to_dict_without_metadata(self) -> dict[str, str | Path | int]: + d = asdict(self) + d.pop("module_name", None) + d.pop("hash", None) + d.pop("url", None) + return d class DataForTestLink_JSON_Encoder(json.JSONEncoder): @@ -60,6 +73,9 @@ def DataForTestLink_JSON_Decoder(d: dict[str, Any]) -> DataForTestLink | dict[st "line", "need", "verify_type", + "module_name", + "hash", + "url", "result", "result_text", } <= d.keys(): @@ -68,6 +84,9 @@ def DataForTestLink_JSON_Decoder(d: dict[str, Any]) -> DataForTestLink | dict[st file=Path(d["file"]), line=d["line"], need=d["need"], + module_name=d.get("module_name", ""), + hash=d.get("hash", ""), + url=d.get("url", ""), verify_type=d["verify_type"], result=d["result"], result_text=d["result_text"], @@ -83,6 +102,9 @@ class DataOfTestCase: file: str | None = None line: str | None = None result: str | None = None # passed | falied | skipped | disabled + module_name: str | None = None + hash: str | None = None + url: str | None = None # Intentionally not snakecase to make dict parsing simple TestType: str | None = None DerivationTechnique: str | None = None @@ -98,6 +120,9 @@ def from_dict(cls, data: dict[str, Any]): # type-ignore file=data.get("file"), line=data.get("line"), result=data.get("result"), + module_name=data.get("module_name"), + hash=data.get("hash"), + url=data.get("url"), TestType=data.get("TestType"), DerivationTechnique=data.get("DerivationTechnique"), result_text=data.get("result_text"), @@ -158,6 +183,8 @@ def is_valid(self) -> bool: # and self.TestType is not None # and self.DerivationTechnique is not None # ): + # Hash & URL are explictily allowed to be empty but not none. + # module_name has to be always filled or something went wrong fields = [ x for x in self.__dataclass_fields__ @@ -199,6 +226,9 @@ def parse_attributes(verify_field: str | None, verify_type: str): assert self.file is not None assert self.line is not None assert self.result is not None + assert self.module_name is not None + assert self.hash is not None + assert self.url is not None assert self.result_text is not None assert self.TestType is not None assert self.DerivationTechnique is not None @@ -212,6 +242,9 @@ def parse_attributes(verify_field: str | None, verify_type: str): verify_type=verify_type, result=self.result, result_text=self.result_text, + module_name=self.module_name, + hash=self.hash, + url=self.url, ) return list( diff --git a/src/extensions/score_source_code_linker/tests/test_codelink.py b/src/extensions/score_source_code_linker/tests/test_codelink.py index 29ddc7235..9d3fa78f5 100644 --- a/src/extensions/score_source_code_linker/tests/test_codelink.py +++ b/src/extensions/score_source_code_linker/tests/test_codelink.py @@ -35,6 +35,7 @@ get_cache_filename, group_by_need, ) +from src.extensions.score_source_code_linker.helpers import get_github_link from src.extensions.score_source_code_linker.needlinks import ( NeedLink, load_source_code_links_json, @@ -43,7 +44,6 @@ from src.helper_lib import ( get_current_git_hash, ) -from src.helper_lib.additional_functions import get_github_link """ # ────────────────ATTENTION─────────────── diff --git a/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py b/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py index 60bb98f80..b4e65279d 100644 --- a/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py +++ b/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py @@ -25,6 +25,7 @@ from sphinx.testing.util import SphinxTestApp from sphinx_needs.data import SphinxNeedsData +from src.extensions.score_source_code_linker.helpers import get_github_link from src.extensions.score_source_code_linker.needlinks import NeedLink from src.extensions.score_source_code_linker.testlink import ( DataForTestLink, @@ -37,7 +38,6 @@ SourceCodeLinks_TEST_JSON_Decoder, ) from src.helper_lib import find_ws_root, get_github_base_url -from src.helper_lib.additional_functions import get_github_link @pytest.fixture() diff --git a/src/extensions/score_source_code_linker/xml_parser.py b/src/extensions/score_source_code_linker/xml_parser.py index 8432e1fc3..123ebd5d8 100644 --- a/src/extensions/score_source_code_linker/xml_parser.py +++ b/src/extensions/score_source_code_linker/xml_parser.py @@ -33,18 +33,41 @@ from sphinx_needs import logging from sphinx_needs.api import add_external_need +from src.extensions.score_source_code_linker.helpers import ( + get_github_link, + parse_info_from_known_good, + parse_module_name_from_path, +) +from src.extensions.score_source_code_linker.module_source_links import ModuleInfo +from src.extensions.score_source_code_linker.needlinks import ( + MetaData, +) from src.extensions.score_source_code_linker.testlink import ( DataOfTestCase, store_data_of_test_case_json, store_test_xml_parsed_json, ) from src.helper_lib import find_ws_root -from src.helper_lib.additional_functions import get_github_link logger = logging.get_logger(__name__) logger.setLevel("DEBUG") +def get_metadata_from_test_path(filepath: Path) -> MetaData: + known_good_json = os.environ.get("KNOWN_GOOD_JSON") + module_name = parse_module_name_from_path(filepath) + md: MetaData = { + "module_name": module_name, + "hash": "", + "url": "", + } + if module_name != "local_module" and known_good_json: + md["hash"], md["url"] = parse_info_from_known_good( + Path(known_good_json), module_name + ) + return md + + def parse_testcase_result(testcase: ET.Element) -> tuple[str, str]: """ Returns 'result' and 'result_text' found in the 'message' @@ -101,7 +124,7 @@ def read_test_xml_file(file: Path) -> tuple[list[DataOfTestCase], list[str], lis missing_prop_tests: list[str] = [] tree = ET.parse(file) root = tree.getroot() - + md = get_metadata_from_test_path(file) for testsuite in root.findall("testsuite"): for testcase in testsuite.findall("testcase"): case_properties = {} @@ -161,6 +184,7 @@ def read_test_xml_file(file: Path) -> tuple[list[DataOfTestCase], list[str], lis # If the is_valid method would return 'False' anyway. # I just can't think of it right now, leaving this for future me case_properties = parse_properties(case_properties, properties_element) + case_properties.update(md) test_case = DataOfTestCase.from_dict(case_properties) if not test_case.is_valid(): missing_prop_tests.append(testname) @@ -169,6 +193,7 @@ def read_test_xml_file(file: Path) -> tuple[list[DataOfTestCase], list[str], lis return test_case_needs, non_prop_tests, missing_prop_tests +# /home/maximilianp/score_personal/reference_integration/bazel-testlogs/external/score_docs_as_code+/src/helper_lib/helper_lib_tests/test.xml def find_xml_files(dir: Path) -> list[Path]: """ Recursively search all test.xml files inside 'bazel-testlogs' @@ -183,18 +208,21 @@ def find_xml_files(dir: Path) -> list[Path]: for root, _, files in os.walk(dir): if test_file_name in files: xml_paths.append(Path(os.path.join(root, test_file_name))) + print("=========================================") + print(xml_paths[0]) + print("=========================================") return xml_paths -def find_test_folder(base_path: Path | None = None) -> Path | None: +def find_test_folder(base_path: Path | None = None) -> tuple[Path | None, Path | None]: ws_root = base_path if base_path is not None else find_ws_root() assert ws_root is not None if os.path.isdir(ws_root / "tests-report"): - return ws_root / "tests-report" + return ws_root, ws_root / "tests-report" if os.path.isdir(ws_root / "bazel-testlogs"): - return ws_root / "bazel-testlogs" + return ws_root, ws_root / "bazel-testlogs" logger.info("could not find tests-report or bazel-testlogs to parse testcases") - return None + return ws_root, None def run_xml_parser(app: Sphinx, env: BuildEnvironment): @@ -203,11 +231,19 @@ def run_xml_parser(app: Sphinx, env: BuildEnvironment): building testcase needs. It gets called from the source_code_linker __init__ """ - testlogs_dir = find_test_folder() + root_path, testlogs_dir = find_test_folder() # early return if testlogs_dir is None: return xml_file_paths = find_xml_files(testlogs_dir) + # scl_with_metadata = load_source_code_links_with_metadata_json( + # app.outdir / "score_source_links_metadata.json" + # )[0] + # metadata: MetaData = { + # "module_name": scl_with_metadata.module_name, + # "hash": scl_with_metadata.hash, + # "url": scl_with_metadata.url, + # } test_case_needs = build_test_needs_from_files(app, env, xml_file_paths) # Saving the test case needs for cache store_data_of_test_case_json( @@ -262,6 +298,10 @@ def construct_and_add_need(app: Sphinx, tn: DataOfTestCase): # and either 'Fully' or 'PartiallyVerifies' should not be None here assert tn.file is not None assert tn.name is not None + assert tn.module_name is not None + assert tn.hash is not None + assert tn.url is not None + metadata = ModuleInfo(name=tn.module_name, hash=tn.hash, url=tn.url) # IDK if this is ideal or not with contextlib.suppress(BaseException): _ = add_external_need( @@ -271,7 +311,7 @@ def construct_and_add_need(app: Sphinx, tn: DataOfTestCase): tags="TEST", id=f"testcase__{tn.name}_{short_hash(tn.file + tn.name).upper()}", name=tn.name, - external_url=get_github_link(tn), + external_url=get_github_link(metadata, tn), fully_verifies=tn.FullyVerifies if tn.FullyVerifies is not None else "", partially_verifies=tn.PartiallyVerifies if tn.PartiallyVerifies is not None diff --git a/src/helper_lib/BUILD b/src/helper_lib/BUILD index 748a2a730..ad6316363 100644 --- a/src/helper_lib/BUILD +++ b/src/helper_lib/BUILD @@ -27,8 +27,7 @@ py_library( visibility = ["//visibility:public"], deps = [ "@rules_python//python/runfiles", - "@score_docs_as_code//src/extensions/score_source_code_linker:source_code_linker_helpers", - ], + ] + all_requirements, ) score_py_pytest( diff --git a/src/helper_lib/__init__.py b/src/helper_lib/__init__.py index 5699e478d..a72fffb0b 100644 --- a/src/helper_lib/__init__.py +++ b/src/helper_lib/__init__.py @@ -17,7 +17,7 @@ from pathlib import Path from typing import Any -from runfiles import Runfiles +from python.runfiles import Runfiles from sphinx.config import Config from sphinx_needs.logging import get_logger diff --git a/src/helper_lib/additional_functions.py b/src/helper_lib/additional_functions.py deleted file mode 100644 index 5b1ce6d98..000000000 --- a/src/helper_lib/additional_functions.py +++ /dev/null @@ -1,38 +0,0 @@ -# ******************************************************************************* -# Copyright (c) 2025 Contributors to the Eclipse Foundation -# -# See the NOTICE file(s) distributed with this work for additional -# information regarding copyright ownership. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0 -# -# SPDX-License-Identifier: Apache-2.0 -# ******************************************************************************* -from pathlib import Path - -# Import types that depend on score_source_code_linker -from src.extensions.score_source_code_linker.needlinks import DefaultNeedLink, NeedLink -from src.extensions.score_source_code_linker.testlink import ( - DataForTestLink, - DataOfTestCase, -) -from src.helper_lib import ( - find_git_root, - get_current_git_hash, - get_github_base_url, -) - - -def get_github_link( - link: NeedLink | DataForTestLink | DataOfTestCase | None = None, -) -> str: - if link is None: - link = DefaultNeedLink() - passed_git_root = find_git_root() - if passed_git_root is None: - passed_git_root = Path() - base_url = get_github_base_url() - current_hash = get_current_git_hash(passed_git_root) - return f"{base_url}/blob/{current_hash}/{link.file}#L{link.line}"