Skip to content
17 changes: 12 additions & 5 deletions docs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -69,23 +69,30 @@ def _rewrite_needs_json_to_sourcelinks(labels):
out.append(s)
return out

def _merge_sourcelinks(name, sourcelinks):
def _merge_sourcelinks(name, sourcelinks, known_good = None):
"""Merge multiple sourcelinks JSON files into a single file.
Args:
name: Name for the merged sourcelinks target
sourcelinks: List of sourcelinks JSON file targets
"""

extra_srcs = []
known_good_arg = ""
if known_good != None:
extra_srcs = [known_good]
known_good_arg = "--known_good $(location %s)" % known_good

native.genrule(
name = name,
srcs = sourcelinks,
srcs = sourcelinks + extra_srcs,
outs = [name + ".json"],
cmd = """
$(location @score_docs_as_code//scripts_bazel:merge_sourcelinks) \
--output $@ \
{known_good_arg} \
$(SRCS)
""",
""".format(known_good_arg = known_good_arg),
Comment on lines +72 to +95
Copy link

Copilot AI Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merge_sourcelinks.py now defines --known_good as required=True, but docs.bzl only passes --known_good when known_good != None. With the default known_good=None, the genrule will invoke the script without --known_good and fail argument parsing. Either make known_good mandatory in docs() / _merge_sourcelinks, or make --known_good optional again and handle the missing case in the script.

Copilot uses AI. Check for mistakes.
tools = ["@score_docs_as_code//scripts_bazel:merge_sourcelinks"],
)

Expand Down Expand Up @@ -120,7 +127,7 @@ def _missing_requirements(deps):
fail(msg)
fail("This case should be unreachable?!")

def docs(source_dir = "docs", data = [], deps = [], scan_code = []):
def docs(source_dir = "docs", data = [], deps = [], scan_code = [], known_good = None):
"""Creates all targets related to documentation.
By using this function, you'll get any and all updates for documentation targets in one place.
Expand Down Expand Up @@ -175,7 +182,7 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = []):

data_with_docs_sources = _rewrite_needs_json_to_docs_sources(data)
additional_combo_sourcelinks = _rewrite_needs_json_to_sourcelinks(data)
_merge_sourcelinks(name = "merged_sourcelinks", sourcelinks = [":sourcelinks_json"] + additional_combo_sourcelinks)
_merge_sourcelinks(name = "merged_sourcelinks", sourcelinks = [":sourcelinks_json"] + additional_combo_sourcelinks, known_good = known_good)

py_binary(
name = "docs",
Expand Down
1 change: 1 addition & 0 deletions scripts_bazel/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ py_binary(
py_binary(
name = "merge_sourcelinks",
srcs = ["merge_sourcelinks.py"],
deps= [ "//src/extensions/score_source_code_linker"],
main = "merge_sourcelinks.py",
visibility = ["//visibility:public"],
)
24 changes: 18 additions & 6 deletions scripts_bazel/generate_sourcelinks_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
from src.extensions.score_source_code_linker.generate_source_code_links_json import (
_extract_references_from_file, # pyright: ignore[reportPrivateUsage] TODO: move it out of the extension and into this script
)
from src.extensions.score_source_code_linker.helpers import parse_module_name_from_path
from src.extensions.score_source_code_linker.needlinks import (
store_source_code_links_json,
MetaData,
store_source_code_links_with_metadata_json,
)

logging.basicConfig(level=logging.INFO, format="%(message)s")
Expand All @@ -37,13 +39,13 @@ def main():
parser = argparse.ArgumentParser(
description="Generate source code links JSON from source files"
)
parser.add_argument(
_ = parser.add_argument(
"--output",
required=True,
type=Path,
help="Output JSON file path",
)
parser.add_argument(
_ = parser.add_argument(
"files",
nargs="*",
type=Path,
Expand All @@ -53,15 +55,25 @@ def main():
args = parser.parse_args()

all_need_references = []
metadata: MetaData = {
"module_name": "",
"hash": "",
"url": "",
}
metadata_set = False
for file_path in args.files:
if "known_good.json" not in str(file_path) and not metadata_set:
metadata["module_name"] = parse_module_name_from_path(file_path)
metadata_set = True
abs_file_path = file_path.resolve()
assert abs_file_path.exists(), abs_file_path
references = _extract_references_from_file(
abs_file_path.parent, Path(abs_file_path.name)
abs_file_path.parent, Path(abs_file_path.name), file_path
)
all_need_references.extend(references)

store_source_code_links_json(args.output, all_need_references)
store_source_code_links_with_metadata_json(
file=args.output, metadata=metadata, needlist=all_need_references
)
Comment on lines +74 to +76
Copy link

Copilot AI Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This switches the generated JSON format from a plain list of NeedLinks to a list whose first element is a metadata dict. Any existing consumers/tests that expect the old schema will now fail. Consider either keeping the old format as default (with an opt-in flag for metadata), or updating all in-repo consumers and tests in the same PR to avoid a partially-migrated state.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@a-zw Valid issue here .

Do you think it would be better to rename this specific cache that comes from here a bit so it is clear it is with metadata?
Like 'scl_metadata_cache.json' or whatever? SO that the name makes it clear to use the metadata reader?

logger.info(
f"Found {len(all_need_references)} need references in {len(args.files)} files"
)
Expand Down
45 changes: 40 additions & 5 deletions scripts_bazel/merge_sourcelinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,36 +21,71 @@
import sys
from pathlib import Path

from src.extensions.score_source_code_linker.helpers import parse_info_from_known_good

logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)



"""
if bazel-out/k8-fastbuild/bin/external/ in file_path => module is external
otherwise it's local
if local => module_name & hash == empty
if external => parse thing for module_name => look up known_good json for hash & url
"""



def add_needid_to_metaneed_mapping(mapping: dict[str, dict[str, str]], metadata: dict[str, str], needid: str):
mapping
pass

def main():
parser = argparse.ArgumentParser(
description="Merge multiple sourcelinks JSON files into one"
)
parser.add_argument(
_ = parser.add_argument(
"--output",
required=True,
type=Path,
help="Output merged JSON file path",
)
parser.add_argument(
_ = parser.add_argument(
"--known_good",
required=True,
help="Optional path to a 'known good' JSON file (provided by Bazel).",
Copy link

Copilot AI Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The --known_good argument help text says it is an "Optional" path, but the argument is marked required=True. This is misleading for users and Bazel rule authors; update the help text or make the flag truly optional.

Suggested change
help="Optional path to a 'known good' JSON file (provided by Bazel).",
help="Path to a 'known good' JSON file (provided by Bazel).",

Copilot uses AI. Check for mistakes.
)
_ = parser.add_argument(
"files",
nargs="*",
type=Path,
help="Input JSON files to merge",
)

args = parser.parse_args()
all_files = [x for x in args.files if "known_good.json" not in str(x)]

merged = []
for json_file in args.files:
needs_metadata_mapping = {}
for json_file in all_files:
with open(json_file) as f:
data = json.load(f)
assert isinstance(data, list), repr(data)
merged.extend(data)
metadata = data[0]
if metadata["module_name"] and metadata["module_name"] != "local_module":
hash, repo = parse_info_from_known_good(
known_good_json=args.known_good, module_name=metadata["module_name"]
)
metadata["hash"] = hash
metadata["url"] = repo
# In the case that 'metadata[module_name]' is 'local_module'
# hash & url are already existing and empty inside of 'metadata'
# Therefore all 3 keys will be written to needlinks in each branch

for d in data[1:]:
d.update(metadata)
assert isinstance(data, list), repr(data)
merged.extend(data[1:])
with open(args.output, "w") as f:
json.dump(merged, f, indent=2, ensure_ascii=False)

Expand Down
1 change: 1 addition & 0 deletions src/extensions/score_source_code_linker/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ py_library(
"needlinks.py",
"testlink.py",
"xml_parser.py",
"helpers.py",
],
imports = ["."],
visibility = ["//visibility:public"],
Expand Down
Loading
Loading