From d0326b877a484bbcce6528c83ddac37e4be35e84 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 17:11:52 +0000 Subject: [PATCH 01/31] copy changes from apps --- github_scripts/get_git_sources.py | 151 ++++++++++++++++++------------ 1 file changed, 90 insertions(+), 61 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 323afbed..230d9cc4 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -1,11 +1,10 @@ -# ----------------------------------------------------------------------------- +# *****************************COPYRIGHT******************************* # (C) Crown copyright Met Office. All rights reserved. -# The file LICENCE, distributed with this code, contains details of the terms -# under which the code may be used. -# ----------------------------------------------------------------------------- - +# For further details please refer to the file COPYRIGHT.txt +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* """ -Clone sources for a rose-stem run for use with git bdiff module in scripts +Helper functions for cloning git sources in command line builds """ import re @@ -14,16 +13,32 @@ from pathlib import Path from shutil import rmtree import shlex -import logging -logger = logging.getLogger(__name__) + +def get_source( + source: str, + ref: str, + dest: Path, + repo: str, + use_mirrors: bool = False, + mirror_loc: Path = "", +) -> None: + + if ".git" in source: + if use_mirrors: + mirror_loc = Path(mirror_loc) / "MetOffice" / repo + print(f"Cloning/Updating {repo} from mirror {mirror_loc} at ref {ref}") + clone_repo_mirror(source, ref, repo, mirror_loc, dest) + else: + print(f"Cloning/Updating {repo} from {source} at ref {ref}") + clone_repo(source, ref, dest) + else: + print(f"Syncing {repo} at ref {ref}") + sync_repo(source, ref, dest) def run_command( - command: str, - check: bool = True, - capture: bool = True, - timeout: int = 600 + command: str, rval: bool = False, check: bool = True ) -> Optional[subprocess.CompletedProcess]: """ Run a subprocess command and return the result object @@ -32,54 +47,59 @@ def run_command( Outputs: - result object from subprocess.run """ - - args = shlex.split(command) - - try: - # Note: text=True and capture_output=True have high overhead - # for large buffers. Use capture=False for fire-and-forget tasks. - result = subprocess.run( - args, - capture_output=capture, - text=capture, - timeout=timeout, - shell=False, - check=False + command = shlex.split(command) + result = subprocess.run( + command, + capture_output=True, + text=True, + timeout=300, + shell=False, + check=False, + ) + if check and result.returncode: + print(result.stdout, end="\n\n\n") + raise RuntimeError( + f"[FAIL] Issue found running command {command}\n\n{result.stderr}" ) - if check and result.returncode != 0: - err_msg = (result.stderr or "").strip() - logger.error(f"[FAIL] Command failed: {command}\nError: {err_msg}") - raise subprocess.CalledProcessError( - result.returncode, args, output=result.stdout, stderr=result.stderr - ) + if rval: return result - except (subprocess.TimeoutExpired, FileNotFoundError) as e: - logger.error(f"[FAIL] Execution error for '{args[0]}': {e}") - raise - def clone_repo_mirror( - source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path + repo_source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path ) -> None: """ Clone a repo source using a local git mirror. Assume the mirror is set up as per the Met Office + - repo_source: ssh url of the source repository + - repo_ref: git ref for the source. An empty string will get the default branch + - parent: Owner of the github repository being cloned (required to construct the + mirror path) + - mirror_loc: path to the local git mirrors + - loc: path to clone the repository to """ - # Remove if this clone already exists + # If the repository exists and isn't a git repo, exit now as we don't want to + # overwrite it if loc.exists(): - rmtree(loc) + if not Path(loc / ".git").exists(): + raise RuntimeError( + f"The destination for the clone of {repo_source} already exists but " + "isn't a git directory. Exiting so as to not overwrite it." + ) - command = f"git clone {mirror_loc} {loc}" - run_command(command) + # Clone if the repo doesn't exist + else: + command = f"git clone {mirror_loc} {loc}" + run_command(command) - # If not provided a ref, return + # If not provided a ref, pull the latest repository and return if not repo_ref: + run_command(f"git -C {loc} pull") return - source = source.removeprefix("git@github.com:") - user = source.split("/")[0] + repo_source = repo_source.removeprefix("git@github.com:") + user = repo_source.split("/")[0] # Check that the user is different to the Upstream User if user in parent.split("/")[0]: user = None @@ -102,24 +122,33 @@ def clone_repo(repo_source: str, repo_ref: str, loc: Path) -> None: """ Clone the repo and checkout the provided ref Only if a remote source + - repo_source: ssh url of the source repository + - repo_ref: git ref for the source. An empty string will get the default branch + - loc: path to clone the repository to """ - # Remove if this clone already exists - if loc.exists(): - rmtree(loc) - - # Create a clean clone location - loc.mkdir(parents=True) - - commands = ( - f"git -C {loc} init", - f"git -C {loc} remote add origin {repo_source}", - f"git -C {loc} fetch origin {repo_ref}", - f"git -C {loc} checkout FETCH_HEAD", - f"git -C {loc} fetch origin main:main", - ) - for command in commands: - run_command(command) + if not loc.exists(): + # Create a clean clone location + loc.mkdir(parents=True) + + # This process is equivalent to doing a git clone + # It saves a small amount of space by not fetching all refs + commands = ( + f"git -C {loc} init", + f"git -C {loc} remote add origin {repo_source}", + f"git -C {loc} fetch origin {repo_ref}", + f"git -C {loc} checkout FETCH_HEAD", + f"git -C {loc} fetch origin main:main", + ) + for command in commands: + run_command(command) + else: + commands = ( + f"git -C {loc} fetch origin {repo_ref}", + f"git -C {loc} checkout FETCH_HEAD", + ) + for command in commands: + run_command(command) def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: @@ -165,8 +194,8 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: # Ignore errors - these are likely because the main branch already exists # Instead write them as warnings command = f"git -C {loc} fetch origin main:main" - result = run_command(command, check=False) - if result and result.returncode: + result = run_command(command, check=False, rval=True) + if result.returncode: print("Warning - fetching main from origin resulted in an error") print("This is likely due to the main branch already existing") print(f"Error message:\n\n{result.stderr}") From 70073e6b7790b6c18d78cb3485e003d6aacffdeb Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 17:16:52 +0000 Subject: [PATCH 02/31] revert accidental changes --- github_scripts/get_git_sources.py | 53 ++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 230d9cc4..b40368a7 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -1,8 +1,8 @@ -# *****************************COPYRIGHT******************************* +# ----------------------------------------------------------------------------- # (C) Crown copyright Met Office. All rights reserved. -# For further details please refer to the file COPYRIGHT.txt -# which you should have received as part of this distribution. -# *****************************COPYRIGHT******************************* +# The file LICENCE, distributed with this code, contains details of the terms +# under which the code may be used. +# ----------------------------------------------------------------------------- """ Helper functions for cloning git sources in command line builds """ @@ -13,6 +13,9 @@ from pathlib import Path from shutil import rmtree import shlex +import logging + +logger = logging.getLogger(__name__) def get_source( @@ -38,7 +41,10 @@ def get_source( def run_command( - command: str, rval: bool = False, check: bool = True + command: str, + check: bool = True, + capture: bool = True, + timeout: int = 600 ) -> Optional[subprocess.CompletedProcess]: """ Run a subprocess command and return the result object @@ -47,23 +53,32 @@ def run_command( Outputs: - result object from subprocess.run """ - command = shlex.split(command) - result = subprocess.run( - command, - capture_output=True, - text=True, - timeout=300, - shell=False, - check=False, - ) - if check and result.returncode: - print(result.stdout, end="\n\n\n") - raise RuntimeError( - f"[FAIL] Issue found running command {command}\n\n{result.stderr}" + + args = shlex.split(command) + + try: + # Note: text=True and capture_output=True have high overhead + # for large buffers. Use capture=False for fire-and-forget tasks. + result = subprocess.run( + args, + capture_output=capture, + text=capture, + timeout=timeout, + shell=False, + check=False ) - if rval: + if check and result.returncode != 0: + err_msg = (result.stderr or "").strip() + logger.error(f"[FAIL] Command failed: {command}\nError: {err_msg}") + raise subprocess.CalledProcessError( + result.returncode, args, output=result.stdout, stderr=result.stderr + ) return result + except (subprocess.TimeoutExpired, FileNotFoundError) as e: + logger.error(f"[FAIL] Execution error for '{args[0]}': {e}") + raise + def clone_repo_mirror( repo_source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path From 538520b4b7998b4c459e4ea63bc443f4ea80b604 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 17:46:34 +0000 Subject: [PATCH 03/31] fix --- github_scripts/get_git_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index b40368a7..8bd0261b 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -209,7 +209,7 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: # Ignore errors - these are likely because the main branch already exists # Instead write them as warnings command = f"git -C {loc} fetch origin main:main" - result = run_command(command, check=False, rval=True) + result = run_command(command, check=False) if result.returncode: print("Warning - fetching main from origin resulted in an error") print("This is likely due to the main branch already existing") From 37ee629039b10230c4a95bfca70bdc2c45437b9c Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 18:51:07 +0000 Subject: [PATCH 04/31] allow multiple sources --- github_scripts/rose_stem_extract_source.py | 59 ++++++++++++---------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index 5a5e590d..a9e05fcd 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -24,12 +24,15 @@ def set_https(dependencies: dict) -> dict: """ print("Modifying Dependencies") - for dependency, values in dependencies.items(): - if values["source"].startswith("git@github.com:"): - source = dependencies[dependency]["source"] - dependencies[dependency]["source"] = source.replace( - "git@github.com:", "https://github.com/" - ) + for dependency, opts in dependencies.items(): + if not opts.isinstance(list): + opts = [opts] + for values in opts: + if values["source"].startswith("git@github.com:"): + source = dependencies[dependency]["source"] + dependencies[dependency]["source"] = source.replace( + "git@github.com:", "https://github.com/" + ) return dependencies @@ -53,31 +56,35 @@ def main() -> None: if os.environ.get("USE_TOKENS", "False") == "True": dependencies = set_https(dependencies) - for dependency, values in dependencies.items(): + for dependency, opts in dependencies.items(): loc = clone_loc / dependency - if ".git" in values["source"]: - if os.environ.get("USE_MIRRORS", "False") == "True": - mirror_loc = Path(os.environ["GIT_MIRROR_LOC"]) / values["parent"] - print( - f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " - f"{dependency} from {mirror_loc} at ref {values['ref']}" - ) - clone_repo_mirror( - values["source"], values["ref"], values["parent"], mirror_loc, loc - ) + if not opts.isinstance(list): + opts = [opts] + + for values in opts: + if ".git" in values["source"]: + if os.environ.get("USE_MIRRORS", "False") == "True": + mirror_loc = Path(os.environ["GIT_MIRROR_LOC"]) / values["parent"] + print( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " + f"{dependency} from {mirror_loc} at ref {values['ref']}" + ) + clone_repo_mirror( + values["source"], values["ref"], values["parent"], mirror_loc, loc + ) + else: + print( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " + f"{dependency} from {values['source']} at ref {values['ref']}" + ) + clone_repo(values["source"], values["ref"], loc) else: print( - f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " - f"{dependency} from {values['source']} at ref {values['ref']}" + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Syncing " + f"{dependency} at ref {values['ref']}" ) - clone_repo(values["source"], values["ref"], loc) - else: - print( - f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Syncing " - f"{dependency} at ref {values['ref']}" - ) - sync_repo(values["source"], values["ref"], loc) + sync_repo(values["source"], values["ref"], loc) if __name__ == "__main__": From e3243949dd145fdbdacb89a167deb762eb5f6b99 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 20:24:28 +0000 Subject: [PATCH 05/31] add merge_sources script --- github_scripts/merge_sources.py | 98 +++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100755 github_scripts/merge_sources.py diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py new file mode 100755 index 00000000..0cfa6a15 --- /dev/null +++ b/github_scripts/merge_sources.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# ----------------------------------------------------------------------------- +# (C) Crown copyright Met Office. All rights reserved. +# The file LICENCE, distributed with this code, contains details of the terms +# under which the code may be used. +# ----------------------------------------------------------------------------- +""" +Script to clone and merge git sources +""" + +import argparse +import os +from pathlib import Path +import yaml +from tempfile import mkdtemp +from get_git_sources import get_source, run_command + +def parse_args(): + """ + Parse arguments + """ + + parser = argparse.ArgumentParser( + description="Extract and merge git sources" + ) + parser.add_argument( + "-d", + "--dependencies", + default=Path(__file__).parent, + help="Path to the dependencies.yaml file" + ) + parser.add_argument( + "-p", + "--path", + default=None, + help="The path to extract the sources to. If part of a cylc suite, it will " + "default to $CYLC_WORKFLOW_SHARE_DIR/source, otherwise __file__/source" + ) + parser.add_argument( + "-m", + "--mirrors", + action="store_true", + help="If true, attempts to use local git mirrors", + ) + parser.add_argument( + "--mirror_loc", + default="/data/users/gitassist/git_mirrors", + help="Location of github mirrors", + ) + args = parser.parse_args() + args.dependencies = args.dependencies.resolve() + if args.dependencies.name != "dependencies.yaml": + args.dependencies = args.dependencies / "dependencies.yaml" + + if not args.path: + args.path = Path(os.getenv("CYLC_WORKFLOW_SHARE_DIR", __file__)) / "source" + args.path = args.path.resolve() + + return args + +def main(): + """ + Main Function + """ + + args = parse_args() + + tempdir = Path(mkdtemp()) + + with open(args.dependencies, "r") as stream: + dependencies = yaml.safe_load(stream) + + for dependency, opts in dependencies: + if not opts.isinstance(list): + opts = [opts] + + for i, values in enumerate(opts): + if i == 0: + dest = args.path + else: + dest = tempdir + get_source( + values["source"], + values["ref"], + dest, + dependency, + args.mirrors, + args.mirror_loc + ) + if i == 0: + continue + command = f"git -C {args.path / dependency} merge {tempdir / dependency}" + run_command(command) + + + +if __name__ == "__main__": + main() From 083fa1c12cfdb1d5f9291185466608a4803134c4 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 20:54:26 +0000 Subject: [PATCH 06/31] make path --- github_scripts/merge_sources.py | 1 + 1 file changed, 1 insertion(+) diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index 0cfa6a15..5ef4827f 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -27,6 +27,7 @@ def parse_args(): "-d", "--dependencies", default=Path(__file__).parent, + type=Path, help="Path to the dependencies.yaml file" ) parser.add_argument( From a0645cad725c834578d8c52e53000aa2599e8862 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 20:55:54 +0000 Subject: [PATCH 07/31] bug --- github_scripts/merge_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index 5ef4827f..a0b1276a 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -71,7 +71,7 @@ def main(): with open(args.dependencies, "r") as stream: dependencies = yaml.safe_load(stream) - for dependency, opts in dependencies: + for dependency, opts in dependencies.items(): if not opts.isinstance(list): opts = [opts] From ef2c48862b2e0645ff58df7cc360975e42139dc5 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 20:58:09 +0000 Subject: [PATCH 08/31] bug --- github_scripts/merge_sources.py | 2 +- github_scripts/rose_stem_extract_source.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index a0b1276a..034bd2be 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -72,7 +72,7 @@ def main(): dependencies = yaml.safe_load(stream) for dependency, opts in dependencies.items(): - if not opts.isinstance(list): + if not isinstance(opts, list): opts = [opts] for i, values in enumerate(opts): diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index a9e05fcd..10aebc0d 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -25,7 +25,7 @@ def set_https(dependencies: dict) -> dict: print("Modifying Dependencies") for dependency, opts in dependencies.items(): - if not opts.isinstance(list): + if not isinstance(opts, list): opts = [opts] for values in opts: if values["source"].startswith("git@github.com:"): @@ -59,7 +59,7 @@ def main() -> None: for dependency, opts in dependencies.items(): loc = clone_loc / dependency - if not opts.isinstance(list): + if not isinstance(opts, list): opts = [opts] for values in opts: From 5a0c657d5b246c14f708da0d2f2de358662efe39 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 27 Jan 2026 08:36:54 +0000 Subject: [PATCH 09/31] fix merge --- github_scripts/get_git_sources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 8bd0261b..56681973 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -24,7 +24,7 @@ def get_source( dest: Path, repo: str, use_mirrors: bool = False, - mirror_loc: Path = "", + mirror_loc: Path = Path(""), ) -> None: if ".git" in source: @@ -210,7 +210,7 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: # Instead write them as warnings command = f"git -C {loc} fetch origin main:main" result = run_command(command, check=False) - if result.returncode: + if result and result.returncode: print("Warning - fetching main from origin resulted in an error") print("This is likely due to the main branch already existing") print(f"Error message:\n\n{result.stderr}") From 468c38202df054d463ab10215c2aec606710814a Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 27 Jan 2026 08:50:07 +0000 Subject: [PATCH 10/31] mirror fix --- github_scripts/get_git_sources.py | 4 ++-- github_scripts/merge_sources.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 56681973..3e0469aa 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -31,7 +31,7 @@ def get_source( if use_mirrors: mirror_loc = Path(mirror_loc) / "MetOffice" / repo print(f"Cloning/Updating {repo} from mirror {mirror_loc} at ref {ref}") - clone_repo_mirror(source, ref, repo, mirror_loc, dest) + clone_repo_mirror(source, ref, mirror_loc, dest) else: print(f"Cloning/Updating {repo} from {source} at ref {ref}") clone_repo(source, ref, dest) @@ -81,7 +81,7 @@ def run_command( def clone_repo_mirror( - repo_source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path + repo_source: str, repo_ref: str, mirror_loc: Path, loc: Path, parent: str = "MetOffice" ) -> None: """ Clone a repo source using a local git mirror. diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index 034bd2be..16f33940 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -77,9 +77,9 @@ def main(): for i, values in enumerate(opts): if i == 0: - dest = args.path + dest = args.path / dependency else: - dest = tempdir + dest = tempdir / dependency get_source( values["source"], values["ref"], From 66e5dcf5f545faf6c067f38865f3e15655af2f93 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 27 Jan 2026 11:56:04 +0000 Subject: [PATCH 11/31] setup merging of branches --- github_scripts/get_git_sources.py | 119 +++++++++++++++++++----------- github_scripts/merge_sources.py | 40 +++++----- 2 files changed, 94 insertions(+), 65 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 3e0469aa..ff5a55d1 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -18,33 +18,8 @@ logger = logging.getLogger(__name__) -def get_source( - source: str, - ref: str, - dest: Path, - repo: str, - use_mirrors: bool = False, - mirror_loc: Path = Path(""), -) -> None: - - if ".git" in source: - if use_mirrors: - mirror_loc = Path(mirror_loc) / "MetOffice" / repo - print(f"Cloning/Updating {repo} from mirror {mirror_loc} at ref {ref}") - clone_repo_mirror(source, ref, mirror_loc, dest) - else: - print(f"Cloning/Updating {repo} from {source} at ref {ref}") - clone_repo(source, ref, dest) - else: - print(f"Syncing {repo} at ref {ref}") - sync_repo(source, ref, dest) - - def run_command( - command: str, - check: bool = True, - capture: bool = True, - timeout: int = 600 + command: str, check: bool = True, capture: bool = True, timeout: int = 600 ) -> Optional[subprocess.CompletedProcess]: """ Run a subprocess command and return the result object @@ -57,15 +32,13 @@ def run_command( args = shlex.split(command) try: - # Note: text=True and capture_output=True have high overhead - # for large buffers. Use capture=False for fire-and-forget tasks. result = subprocess.run( args, capture_output=capture, text=capture, timeout=timeout, shell=False, - check=False + check=False, ) if check and result.returncode != 0: err_msg = (result.stderr or "").strip() @@ -80,16 +53,85 @@ def run_command( raise +def get_source( + source: str, + ref: str, + dest: Path, + repo: str, + use_mirrors: bool = False, + mirror_loc: Path = Path(""), +) -> None: + """ + Call functions to clone or rsync git source + """ + + print(source, ref, dest, repo, use_mirrors, mirror_loc) + + if ".git" in source: + if use_mirrors: + mirror_loc = Path(mirror_loc) / "MetOffice" / repo + print(f"Cloning/Updating {repo} from mirror {mirror_loc} at ref {ref}") + clone_repo_mirror(source, ref, mirror_loc, dest) + else: + print(f"Cloning/Updating {repo} from {source} at ref {ref}") + clone_repo(source, ref, dest) + else: + print(f"Syncing {repo} at ref {ref}") + sync_repo(source, ref, dest) + + +def merge_source(source: str, ref: str, repo: str, dest: Path, use_mirrors: bool = False, mirror_loc: Path = Path("")): + """ + Merge git source into a local git clone. Assumes dest is a git clone that this + source can be merged into. + """ + + if use_mirrors: + remote_path = Path(mirror_loc) / "MetOffice" / repo + else: + remote_path = source + run_command(f"git -C {dest} remote add local {remote_path}") + + if use_mirrors: + fetch = determine_mirror_fetch(source, ref) + else: + fetch = ref + + run_command(f"git -C {dest} fetch local {fetch}") + run_command(f"git -C {dest} merge FETCH_HEAD") + + +def determine_mirror_fetch(repo_source: str, repo_ref: str) -> str: + """ + Determine the fetch ref for the git mirrors + """ + + repo_source = repo_source.removeprefix("git@github.com:") + user = repo_source.split("/")[0] + # Check that the user is different to the Upstream User + if "MetOffice" in user: + user = None + + # If the ref is a hash then we don't need the fork user as part of the fetch. + # Equally, if the user is the Upstream User, it's not needed + if not user or re.match(r"^\s*([0-9a-f]{40})\s*$", repo_ref): + fetch = repo_ref + else: + fetch = f"{user}/{repo_ref}" + + return fetch + def clone_repo_mirror( - repo_source: str, repo_ref: str, mirror_loc: Path, loc: Path, parent: str = "MetOffice" + repo_source: str, + repo_ref: str, + mirror_loc: Path, + loc: Path, ) -> None: """ Clone a repo source using a local git mirror. Assume the mirror is set up as per the Met Office - repo_source: ssh url of the source repository - repo_ref: git ref for the source. An empty string will get the default branch - - parent: Owner of the github repository being cloned (required to construct the - mirror path) - mirror_loc: path to the local git mirrors - loc: path to clone the repository to """ @@ -113,18 +155,7 @@ def clone_repo_mirror( run_command(f"git -C {loc} pull") return - repo_source = repo_source.removeprefix("git@github.com:") - user = repo_source.split("/")[0] - # Check that the user is different to the Upstream User - if user in parent.split("/")[0]: - user = None - - # If the ref is a hash then we don't need the fork user as part of the fetch. - # Equally, if the user is the Upstream User, it's not needed - if not user or re.match(r"^\s*([0-9a-f]{40})\s*$", repo_ref): - fetch = repo_ref - else: - fetch = f"{user}/{repo_ref}" + fetch = determine_mirror_fetch(repo_source, repo_ref) commands = ( f"git -C {loc} fetch origin {fetch}", f"git -C {loc} checkout FETCH_HEAD", diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index 16f33940..2f6514b9 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -10,32 +10,32 @@ import argparse import os -from pathlib import Path import yaml +from pathlib import Path +from shutil import rmtree from tempfile import mkdtemp -from get_git_sources import get_source, run_command +from get_git_sources import get_source, merge_source + def parse_args(): """ Parse arguments """ - parser = argparse.ArgumentParser( - description="Extract and merge git sources" - ) + parser = argparse.ArgumentParser(description="Extract and merge git sources") parser.add_argument( "-d", "--dependencies", default=Path(__file__).parent, type=Path, - help="Path to the dependencies.yaml file" + help="Path to the dependencies.yaml file", ) parser.add_argument( "-p", "--path", default=None, help="The path to extract the sources to. If part of a cylc suite, it will " - "default to $CYLC_WORKFLOW_SHARE_DIR/source, otherwise __file__/source" + "default to $CYLC_WORKFLOW_SHARE_DIR/source, otherwise __file__/source", ) parser.add_argument( "-m", @@ -59,6 +59,7 @@ def parse_args(): return args + def main(): """ Main Function @@ -76,23 +77,20 @@ def main(): opts = [opts] for i, values in enumerate(opts): + dest = args.path / dependency if i == 0: - dest = args.path / dependency - else: - dest = tempdir / dependency - get_source( - values["source"], - values["ref"], - dest, - dependency, - args.mirrors, - args.mirror_loc - ) - if i == 0: + get_source( + values["source"], + values["ref"], + dest, + dependency, + args.mirrors, + args.mirror_loc, + ) continue - command = f"git -C {args.path / dependency} merge {tempdir / dependency}" - run_command(command) + merge_source(values["source"], values["ref"], dependency, dest, args.mirrors, args.mirror_loc) + rmtree(tempdir) if __name__ == "__main__": From da3860aacfedce7e996153cf49b03649f9a70667 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 27 Jan 2026 11:57:09 +0000 Subject: [PATCH 12/31] ruff --- github_scripts/get_git_sources.py | 10 +++++++++- github_scripts/merge_sources.py | 9 ++++++++- github_scripts/rose_stem_extract_source.py | 6 +++++- github_scripts/suite_data.py | 3 +-- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index ff5a55d1..ba13f7b0 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -80,7 +80,14 @@ def get_source( sync_repo(source, ref, dest) -def merge_source(source: str, ref: str, repo: str, dest: Path, use_mirrors: bool = False, mirror_loc: Path = Path("")): +def merge_source( + source: str, + ref: str, + repo: str, + dest: Path, + use_mirrors: bool = False, + mirror_loc: Path = Path(""), +): """ Merge git source into a local git clone. Assumes dest is a git clone that this source can be merged into. @@ -121,6 +128,7 @@ def determine_mirror_fetch(repo_source: str, repo_ref: str) -> str: return fetch + def clone_repo_mirror( repo_source: str, repo_ref: str, diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index 2f6514b9..d523964d 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -88,7 +88,14 @@ def main(): args.mirror_loc, ) continue - merge_source(values["source"], values["ref"], dependency, dest, args.mirrors, args.mirror_loc) + merge_source( + values["source"], + values["ref"], + dependency, + dest, + args.mirrors, + args.mirror_loc, + ) rmtree(tempdir) diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index 10aebc0d..223882d1 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -71,7 +71,11 @@ def main() -> None: f"{dependency} from {mirror_loc} at ref {values['ref']}" ) clone_repo_mirror( - values["source"], values["ref"], values["parent"], mirror_loc, loc + values["source"], + values["ref"], + values["parent"], + mirror_loc, + loc, ) else: print( diff --git a/github_scripts/suite_data.py b/github_scripts/suite_data.py index add597f9..e06b702d 100644 --- a/github_scripts/suite_data.py +++ b/github_scripts/suite_data.py @@ -323,7 +323,7 @@ def generate_cylc_url(self) -> str: Generate a markdown url to the cylc review page of a workflow """ suite_user = os.environ["USER"] - encoded_workflow_id = self.workflow_id.replace('/','%2F') + encoded_workflow_id = self.workflow_id.replace("/", "%2F") cylc_review = ( f"[{self.workflow_id}](https://cylchub/services/cylc-review/cycles" @@ -332,7 +332,6 @@ def generate_cylc_url(self) -> str: return cylc_review - def get_suite_starttime(self) -> str: """ Read the suite starttime from the suite database From 8a69af2d052bba5ec443782c0ec92aa3047b0045 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 27 Jan 2026 12:59:21 +0000 Subject: [PATCH 13/31] remove remote --- github_scripts/get_git_sources.py | 43 ++++++++++++++++--------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index ba13f7b0..8aaaaa2f 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -106,27 +106,7 @@ def merge_source( run_command(f"git -C {dest} fetch local {fetch}") run_command(f"git -C {dest} merge FETCH_HEAD") - - -def determine_mirror_fetch(repo_source: str, repo_ref: str) -> str: - """ - Determine the fetch ref for the git mirrors - """ - - repo_source = repo_source.removeprefix("git@github.com:") - user = repo_source.split("/")[0] - # Check that the user is different to the Upstream User - if "MetOffice" in user: - user = None - - # If the ref is a hash then we don't need the fork user as part of the fetch. - # Equally, if the user is the Upstream User, it's not needed - if not user or re.match(r"^\s*([0-9a-f]{40})\s*$", repo_ref): - fetch = repo_ref - else: - fetch = f"{user}/{repo_ref}" - - return fetch + run_command(f"git -C {dest} remote remove local") def clone_repo_mirror( @@ -172,6 +152,27 @@ def clone_repo_mirror( run_command(command) +def determine_mirror_fetch(repo_source: str, repo_ref: str) -> str: + """ + Determine the fetch ref for the git mirrors + """ + + repo_source = repo_source.removeprefix("git@github.com:") + user = repo_source.split("/")[0] + # Check that the user is different to the Upstream User + if "MetOffice" in user: + user = None + + # If the ref is a hash then we don't need the fork user as part of the fetch. + # Equally, if the user is the Upstream User, it's not needed + if not user or re.match(r"^\s*([0-9a-f]{40})\s*$", repo_ref): + fetch = repo_ref + else: + fetch = f"{user}/{repo_ref}" + + return fetch + + def clone_repo(repo_source: str, repo_ref: str, loc: Path) -> None: """ Clone the repo and checkout the provided ref From 732a196dc1dc1e548bb26abf8151ee7cb41878cf Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 27 Jan 2026 17:06:08 +0000 Subject: [PATCH 14/31] working merge script --- github_scripts/get_git_sources.py | 85 ++++++++++++++++++++-- github_scripts/merge_sources.py | 20 +++-- github_scripts/rose_stem_extract_source.py | 73 +++++++------------ 3 files changed, 118 insertions(+), 60 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 8aaaaa2f..62fa85de 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -9,6 +9,7 @@ import re import subprocess +from datetime import datetime from typing import Optional from pathlib import Path from shutil import rmtree @@ -69,14 +70,23 @@ def get_source( if ".git" in source: if use_mirrors: + print( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " + f"{repo} from {mirror_loc} at ref {ref}" + ) mirror_loc = Path(mirror_loc) / "MetOffice" / repo - print(f"Cloning/Updating {repo} from mirror {mirror_loc} at ref {ref}") clone_repo_mirror(source, ref, mirror_loc, dest) else: - print(f"Cloning/Updating {repo} from {source} at ref {ref}") + print( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " + f"{repo} from {source} at ref {ref}" + ) clone_repo(source, ref, dest) else: - print(f"Syncing {repo} at ref {ref}") + print( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Syncing " + f"{repo} at ref {ref}" + ) sync_repo(source, ref, dest) @@ -87,12 +97,17 @@ def merge_source( dest: Path, use_mirrors: bool = False, mirror_loc: Path = Path(""), -): +) -> None: """ Merge git source into a local git clone. Assumes dest is a git clone that this source can be merged into. """ + print( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Merging " + f"{source} at ref {ref} into {repo}" + ) + if use_mirrors: remote_path = Path(mirror_loc) / "MetOffice" / repo else: @@ -105,10 +120,51 @@ def merge_source( fetch = ref run_command(f"git -C {dest} fetch local {fetch}") - run_command(f"git -C {dest} merge FETCH_HEAD") + result = run_command(f"git -C {dest} merge --no-gpg-sign FETCH_HEAD", check=False) + if result.returncode: + unmerged_files = get_unmerged(dest) + if unmerged_files: + handle_merge_conflicts(source, ref, dest, repo) + + # Remove the added remote run_command(f"git -C {dest} remote remove local") +def handle_merge_conflicts(source: str, ref: str, loc: Path, dependency: str) -> None: + """ + Attempt to mark merge conflicts as resolved in they are in rose-stem or + dependencies.yaml + If others remain then raise an error + """ + + # For suites, merge conflicts in these files/directories are unimportant so accept + # the current changes + for filepath in ("dependencies.yaml", "rose-stem"): + print(f"Ignoring merge conflicts in {filepath}") + run_command(f"git -C {loc} checkout --ours -- {filepath}") + run_command(f"git -C {loc} add {filepath}") + + # Check if there are any remaining merge conflicts + unmerged = get_unmerged(loc) + if unmerged: + files = "\n".join(f for f in unmerged) + raise RuntimeError( + "\nA merge conflict has been identified while merging the following branch " + f"into the {dependency} source:\n\nsource: {source}\nref: {ref}\n\n" + f"with conflicting files:{files}" + "\n\nThese will need changing in the source branches to be useable together" + ) + + +def get_unmerged(loc: Path) -> list[str]: + """ + Return list of unmerged files in a git clone + """ + + files = run_command(f"git -C {loc} --no-pager diff --name-only --diff-filter=U") + return files.stdout.split() + + def clone_repo_mirror( repo_source: str, repo_ref: str, @@ -258,3 +314,22 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: if repo_ref: command = f"git -C {loc} checkout {repo_ref}" run_command(command) + + +def set_https(dependencies: dict) -> dict: + """ + Change sources in a dependencies dictions to use https instead of ssh + """ + + print("Modifying Dependencies") + for dependency, opts in dependencies.items(): + if not isinstance(opts, list): + opts = [opts] + for values in opts: + if values["source"].startswith("git@github.com:"): + source = dependencies[dependency]["source"] + dependencies[dependency]["source"] = source.replace( + "git@github.com:", "https://github.com/" + ) + + return dependencies diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index d523964d..2c495a9e 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -12,9 +12,7 @@ import os import yaml from pathlib import Path -from shutil import rmtree -from tempfile import mkdtemp -from get_git_sources import get_source, merge_source +from get_git_sources import get_source, merge_source, set_https def parse_args(): @@ -48,6 +46,12 @@ def parse_args(): default="/data/users/gitassist/git_mirrors", help="Location of github mirrors", ) + parser.add_argument( + "--tokens", + action="store_true", + help="If true, https github sources will be used, requiring github " + "authentication via Personal Access Tokens", + ) args = parser.parse_args() args.dependencies = args.dependencies.resolve() if args.dependencies.name != "dependencies.yaml": @@ -67,17 +71,19 @@ def main(): args = parse_args() - tempdir = Path(mkdtemp()) - with open(args.dependencies, "r") as stream: dependencies = yaml.safe_load(stream) + if args.tokens: + dependencies = set_https(dependencies) + for dependency, opts in dependencies.items(): + dest = args.path / dependency + if not isinstance(opts, list): opts = [opts] for i, values in enumerate(opts): - dest = args.path / dependency if i == 0: get_source( values["source"], @@ -97,8 +103,6 @@ def main(): args.mirror_loc, ) - rmtree(tempdir) - if __name__ == "__main__": main() diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index 223882d1..bc7e4137 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -14,27 +14,7 @@ import os from pathlib import Path from ast import literal_eval -from get_git_sources import clone_repo, clone_repo_mirror, sync_repo -from datetime import datetime - - -def set_https(dependencies: dict) -> dict: - """ - Change sources in a dependencies dictions to use https instead of ssh - """ - - print("Modifying Dependencies") - for dependency, opts in dependencies.items(): - if not isinstance(opts, list): - opts = [opts] - for values in opts: - if values["source"].startswith("git@github.com:"): - source = dependencies[dependency]["source"] - dependencies[dependency]["source"] = source.replace( - "git@github.com:", "https://github.com/" - ) - - return dependencies +from get_git_sources import get_source, merge_source, set_https def main() -> None: @@ -56,39 +36,38 @@ def main() -> None: if os.environ.get("USE_TOKENS", "False") == "True": dependencies = set_https(dependencies) + use_mirrors = False + if os.environ.get("USE_MIRRORS", "False") == "True": + use_mirrors = True + for dependency, opts in dependencies.items(): loc = clone_loc / dependency if not isinstance(opts, list): opts = [opts] - for values in opts: - if ".git" in values["source"]: - if os.environ.get("USE_MIRRORS", "False") == "True": - mirror_loc = Path(os.environ["GIT_MIRROR_LOC"]) / values["parent"] - print( - f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " - f"{dependency} from {mirror_loc} at ref {values['ref']}" - ) - clone_repo_mirror( - values["source"], - values["ref"], - values["parent"], - mirror_loc, - loc, - ) - else: - print( - f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " - f"{dependency} from {values['source']} at ref {values['ref']}" - ) - clone_repo(values["source"], values["ref"], loc) - else: - print( - f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Syncing " - f"{dependency} at ref {values['ref']}" + for i, values in enumerate(opts): + mirror_loc = Path(os.getenv("GIT_MIRROR_LOC", "")) / "MetOffice" + + if i == 0: + get_source( + values["source"], + values["ref"], + loc, + dependency, + use_mirrors, + mirror_loc, ) - sync_repo(values["source"], values["ref"], loc) + continue + + merge_source( + values["source"], + values["ref"], + dependency, + loc, + use_mirrors, + mirror_loc, + ) if __name__ == "__main__": From 2ad25439655cd0da80c8fd8f944ed7c60db1ac48 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 27 Jan 2026 19:05:36 +0000 Subject: [PATCH 15/31] add another error --- github_scripts/get_git_sources.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 62fa85de..7922fd20 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -120,11 +120,16 @@ def merge_source( fetch = ref run_command(f"git -C {dest} fetch local {fetch}") - result = run_command(f"git -C {dest} merge --no-gpg-sign FETCH_HEAD", check=False) + command = f"git -C {dest} mergee --no-gpg-sign FETCH_HEAD" + result = run_command(command, check=False) if result.returncode: unmerged_files = get_unmerged(dest) if unmerged_files: handle_merge_conflicts(source, ref, dest, repo) + else: + raise subprocess.CalledProcessError( + result.returncode, command, result.stdout, result.stderr + ) # Remove the added remote run_command(f"git -C {dest} remote remove local") From 43c8f0e3e95204305b73571619e0b64421c0df4c Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 08:56:48 +0000 Subject: [PATCH 16/31] modify argument order --- github_scripts/get_git_sources.py | 2 +- github_scripts/merge_sources.py | 2 +- github_scripts/rose_stem_extract_source.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 7922fd20..a2a9eb64 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -93,8 +93,8 @@ def get_source( def merge_source( source: str, ref: str, - repo: str, dest: Path, + repo: str, use_mirrors: bool = False, mirror_loc: Path = Path(""), ) -> None: diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index 2c495a9e..d04ae094 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -97,8 +97,8 @@ def main(): merge_source( values["source"], values["ref"], - dependency, dest, + dependency, args.mirrors, args.mirror_loc, ) diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index bc7e4137..084477b8 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -63,8 +63,8 @@ def main() -> None: merge_source( values["source"], values["ref"], - dependency, loc, + dependency, use_mirrors, mirror_loc, ) From 7a9a6c763be96c61396f7c15e8424fea54055933 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 11:47:00 +0000 Subject: [PATCH 17/31] Update github_scripts/get_git_sources.py Co-authored-by: Sam Clarke-Green <74185251+t00sa@users.noreply.github.com> --- github_scripts/get_git_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index a2a9eb64..154af8a0 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -120,7 +120,7 @@ def merge_source( fetch = ref run_command(f"git -C {dest} fetch local {fetch}") - command = f"git -C {dest} mergee --no-gpg-sign FETCH_HEAD" + command = f"git -C {dest} merge --no-gpg-sign FETCH_HEAD" result = run_command(command, check=False) if result.returncode: unmerged_files = get_unmerged(dest) From 60997bdab5855acf2013798e986e1fa795b6f2f5 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 11:47:12 +0000 Subject: [PATCH 18/31] Update github_scripts/merge_sources.py Co-authored-by: Sam Clarke-Green <74185251+t00sa@users.noreply.github.com> --- github_scripts/merge_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index d04ae094..4e4c3501 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -54,7 +54,7 @@ def parse_args(): ) args = parser.parse_args() args.dependencies = args.dependencies.resolve() - if args.dependencies.name != "dependencies.yaml": + if args.dependencies.is_dir(): args.dependencies = args.dependencies / "dependencies.yaml" if not args.path: From 9224a8271e9a94827f4999de7cb56560b347ef45 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 11:47:36 +0000 Subject: [PATCH 19/31] Update github_scripts/merge_sources.py Co-authored-by: Sam Clarke-Green <74185251+t00sa@users.noreply.github.com> --- github_scripts/merge_sources.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index 4e4c3501..be452f72 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -71,8 +71,7 @@ def main(): args = parse_args() - with open(args.dependencies, "r") as stream: - dependencies = yaml.safe_load(stream) + dependencies = yaml.safe_load(args.dependencies.read_text()) if args.tokens: dependencies = set_https(dependencies) From 18f86102054ed78adc1522c7af4432ad9df9c02f Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 11:48:08 +0000 Subject: [PATCH 20/31] Update github_scripts/rose_stem_extract_source.py Co-authored-by: Sam Clarke-Green <74185251+t00sa@users.noreply.github.com> --- github_scripts/rose_stem_extract_source.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index 084477b8..300df72f 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -36,9 +36,7 @@ def main() -> None: if os.environ.get("USE_TOKENS", "False") == "True": dependencies = set_https(dependencies) - use_mirrors = False - if os.environ.get("USE_MIRRORS", "False") == "True": - use_mirrors = True + use_mirrors = os.environ.get("USE_MIRRORS", "False") == "True" for dependency, opts in dependencies.items(): loc = clone_loc / dependency From 6bc07c6f4af1f64dd066e00e1bf5fc59c6cb0dd3 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 11:54:11 +0000 Subject: [PATCH 21/31] update to use logger --- github_scripts/get_git_sources.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 154af8a0..650e3ac4 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -66,24 +66,22 @@ def get_source( Call functions to clone or rsync git source """ - print(source, ref, dest, repo, use_mirrors, mirror_loc) - if ".git" in source: if use_mirrors: - print( + logger.info( f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " f"{repo} from {mirror_loc} at ref {ref}" ) mirror_loc = Path(mirror_loc) / "MetOffice" / repo clone_repo_mirror(source, ref, mirror_loc, dest) else: - print( + logger.info( f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " f"{repo} from {source} at ref {ref}" ) clone_repo(source, ref, dest) else: - print( + logger.info( f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Syncing " f"{repo} at ref {ref}" ) @@ -103,7 +101,7 @@ def merge_source( source can be merged into. """ - print( + logger.info( f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Merging " f"{source} at ref {ref} into {repo}" ) @@ -145,7 +143,7 @@ def handle_merge_conflicts(source: str, ref: str, loc: Path, dependency: str) -> # For suites, merge conflicts in these files/directories are unimportant so accept # the current changes for filepath in ("dependencies.yaml", "rose-stem"): - print(f"Ignoring merge conflicts in {filepath}") + logger.warning(f"Ignoring merge conflicts in {filepath}") run_command(f"git -C {loc} checkout --ours -- {filepath}") run_command(f"git -C {loc} add {filepath}") @@ -312,9 +310,11 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: command = f"git -C {loc} fetch origin main:main" result = run_command(command, check=False) if result and result.returncode: - print("Warning - fetching main from origin resulted in an error") - print("This is likely due to the main branch already existing") - print(f"Error message:\n\n{result.stderr}") + logger.warning( + "Fetching main from origin resulted in an error." + "This is likely due to the main branch already existing" + f"\nError message:\n\n{result.stderr}" + ) if repo_ref: command = f"git -C {loc} checkout {repo_ref}" @@ -326,7 +326,7 @@ def set_https(dependencies: dict) -> dict: Change sources in a dependencies dictions to use https instead of ssh """ - print("Modifying Dependencies") + logger.info("Modifying Dependencies to use https") for dependency, opts in dependencies.items(): if not isinstance(opts, list): opts = [opts] From 7fb1810ae950e51080af09b57cfc73ec18b13cec Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 14:38:08 +0000 Subject: [PATCH 22/31] loggin gchanges --- github_scripts/get_git_sources.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 650e3ac4..1ab927a9 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -16,7 +16,9 @@ import shlex import logging +logging.basicConfig() logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) def run_command( From cf126520d6d45c7a3a8ba3c70d4c29487175471e Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 14:41:55 +0000 Subject: [PATCH 23/31] loggin gchanges --- github_scripts/get_git_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 1ab927a9..426a2fff 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -16,7 +16,7 @@ import shlex import logging -logging.basicConfig() +logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) From da8239d7b5a2a7f246794fd185b240b1ab67c379 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 14:46:40 +0000 Subject: [PATCH 24/31] logging changes --- github_scripts/get_git_sources.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 426a2fff..6ab1d321 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -18,7 +18,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) +logger.info("Git Source Extraction") +# logger.setLevel(logging.INFO) def run_command( From 546317900a62bb3d8d91ab312513233572eb81a4 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 14:51:29 +0000 Subject: [PATCH 25/31] logging changes --- github_scripts/get_git_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 6ab1d321..a9ecb7f0 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -16,8 +16,8 @@ import shlex import logging -logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) logger.info("Git Source Extraction") # logger.setLevel(logging.INFO) From 5835d3662eab80978422a4cde9f601998215d5f1 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 14:56:16 +0000 Subject: [PATCH 26/31] logging changes --- github_scripts/get_git_sources.py | 1 - github_scripts/merge_sources.py | 3 +++ github_scripts/rose_stem_extract_source.py | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index a9ecb7f0..90877069 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -17,7 +17,6 @@ import logging logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) logger.info("Git Source Extraction") # logger.setLevel(logging.INFO) diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index be452f72..784af952 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -10,6 +10,7 @@ import argparse import os +import logging import yaml from pathlib import Path from get_git_sources import get_source, merge_source, set_https @@ -71,6 +72,8 @@ def main(): args = parse_args() + logging.basicConfig(level=logging.INFO) + dependencies = yaml.safe_load(args.dependencies.read_text()) if args.tokens: diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index 300df72f..f2b7c4cd 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -12,6 +12,7 @@ """ import os +import logging from pathlib import Path from ast import literal_eval from get_git_sources import get_source, merge_source, set_https @@ -30,6 +31,8 @@ def main() -> None: 4. If USE_MIRRORS is True, clone from local mirrors at GIT_MIRROR_LOC """ + logging.basicConfig(level=logging.INFO) + clone_loc = Path(os.environ["SOURCE_DIRECTORY"]) dependencies: dict = literal_eval(os.environ["DEPENDENCIES"]) From 6f5f6234fe60892fb842cf33f00ee6aa3088e0f1 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 15:00:10 +0000 Subject: [PATCH 27/31] logging changes --- github_scripts/get_git_sources.py | 3 ++- github_scripts/merge_sources.py | 3 --- github_scripts/rose_stem_extract_source.py | 3 --- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 90877069..13e796ed 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -14,11 +14,12 @@ from pathlib import Path from shutil import rmtree import shlex +import sys import logging logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO, stream=sys.stdout) logger.info("Git Source Extraction") -# logger.setLevel(logging.INFO) def run_command( diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index 784af952..be452f72 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -10,7 +10,6 @@ import argparse import os -import logging import yaml from pathlib import Path from get_git_sources import get_source, merge_source, set_https @@ -72,8 +71,6 @@ def main(): args = parse_args() - logging.basicConfig(level=logging.INFO) - dependencies = yaml.safe_load(args.dependencies.read_text()) if args.tokens: diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index f2b7c4cd..300df72f 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -12,7 +12,6 @@ """ import os -import logging from pathlib import Path from ast import literal_eval from get_git_sources import get_source, merge_source, set_https @@ -31,8 +30,6 @@ def main() -> None: 4. If USE_MIRRORS is True, clone from local mirrors at GIT_MIRROR_LOC """ - logging.basicConfig(level=logging.INFO) - clone_loc = Path(os.environ["SOURCE_DIRECTORY"]) dependencies: dict = literal_eval(os.environ["DEPENDENCIES"]) From 682968a61901ad29d3463ad805b9e36eadad7a19 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Wed, 28 Jan 2026 15:01:59 +0000 Subject: [PATCH 28/31] logging changes --- github_scripts/get_git_sources.py | 1 - 1 file changed, 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 13e796ed..28b89370 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -19,7 +19,6 @@ logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO, stream=sys.stdout) -logger.info("Git Source Extraction") def run_command( From 248e6dc88c41587e4c47ca4c2f123b3b61e9bb82 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Thu, 29 Jan 2026 07:58:09 +0000 Subject: [PATCH 29/31] move basicconfig --- github_scripts/merge_sources.py | 4 ++++ github_scripts/rose_stem_extract_source.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index be452f72..4ce0341f 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -13,6 +13,8 @@ import yaml from pathlib import Path from get_git_sources import get_source, merge_source, set_https +import logging +import sys def parse_args(): @@ -71,6 +73,8 @@ def main(): args = parse_args() + logging.basicConfig(level=logging.INFO, stream=sys.stdout) + dependencies = yaml.safe_load(args.dependencies.read_text()) if args.tokens: diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index 300df72f..a2ae5478 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -15,6 +15,8 @@ from pathlib import Path from ast import literal_eval from get_git_sources import get_source, merge_source, set_https +import logging +import sys def main() -> None: @@ -30,6 +32,8 @@ def main() -> None: 4. If USE_MIRRORS is True, clone from local mirrors at GIT_MIRROR_LOC """ + logging.basicConfig(level=logging.INFO, stream=sys.stdout) + clone_loc = Path(os.environ["SOURCE_DIRECTORY"]) dependencies: dict = literal_eval(os.environ["DEPENDENCIES"]) From 76dd38b339da1b7f9c22979549ae76bf8859d3c4 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Thu, 29 Jan 2026 11:59:20 +0000 Subject: [PATCH 30/31] Apply suggestion from @ericaneininger Co-authored-by: Erica Neininger <107684099+ericaneininger@users.noreply.github.com> --- github_scripts/get_git_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 09b69039..0233be69 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -314,7 +314,7 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: def set_https(dependencies: dict) -> dict: """ - Change sources in a dependencies dictions to use https instead of ssh + Change sources in a dependencies dictionary to use https instead of ssh """ logger.info("Modifying Dependencies to use https") From c4f4ee5037d281e351eec1e015bc87fb7e15ddd5 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Thu, 29 Jan 2026 13:43:45 +0000 Subject: [PATCH 31/31] cr changes --- github_scripts/get_git_sources.py | 50 ++++++++++++++++------ github_scripts/merge_sources.py | 26 +++++------ github_scripts/rose_stem_extract_source.py | 34 +++++++-------- 3 files changed, 69 insertions(+), 41 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 09b69039..24d1b2cb 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -56,6 +56,39 @@ def run_command( raise +def validate_dependencies(dependencies: dict) -> None: + """ + Check that the dependencies file dictionary matches format expectations. + Each dictionary value should be a list of dictionaries (or a single dictionary) + Those dictionaries should have a "source" and a "ref" key + """ + for item, values in dependencies.items(): + failed = False + if isinstance(values, dict): + values = [values] + if not isinstance(values, list): + failed = True + else: + for entry in values: + if not isinstance(entry, dict) or ( + "source" not in entry or "ref" not in entry + ): + failed = True + if failed: + raise ValueError( + f"The dependency {item} does not contain a list of dictionaries (or a " + "single dictionary) with keys of 'source' and 'ref'.\nPlease edit your " + "dependencies.yaml file to satisfy this." + ) + + +def datetime_str() -> str: + """ + Create and return a datetime string at the current time + """ + return datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + def get_source( source: str, ref: str, @@ -71,22 +104,15 @@ def get_source( if ".git" in source: if use_mirrors: logger.info( - f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " - f"{repo} from {mirror_loc} at ref {ref}" + f"[{datetime_str()}] Cloning {repo} from {mirror_loc} at ref {ref}" ) mirror_loc = Path(mirror_loc) / "MetOffice" / repo clone_repo_mirror(source, ref, mirror_loc, dest) else: - logger.info( - f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning " - f"{repo} from {source} at ref {ref}" - ) + logger.info(f"[{datetime_str()}] Cloning {repo} from {source} at ref {ref}") clone_repo(source, ref, dest) else: - logger.info( - f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Syncing " - f"{repo} at ref {ref}" - ) + logger.info(f"[{datetime_str()}] Syncing {repo} at ref {ref}") sync_repo(source, ref, dest) @@ -137,8 +163,8 @@ def merge_source( def handle_merge_conflicts(source: str, ref: str, loc: Path, dependency: str) -> None: """ - Attempt to mark merge conflicts as resolved in they are in rose-stem or - dependencies.yaml + If merge conflicts are in `rose-stem/` or `dependencies.yaml` then accept the + current changes and mark as resolved. If others remain then raise an error """ diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index 4ce0341f..db47d5ba 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -12,7 +12,7 @@ import os import yaml from pathlib import Path -from get_git_sources import get_source, merge_source, set_https +from get_git_sources import get_source, merge_source, set_https, validate_dependencies import logging import sys @@ -76,6 +76,7 @@ def main(): logging.basicConfig(level=logging.INFO, stream=sys.stdout) dependencies = yaml.safe_load(args.dependencies.read_text()) + validate_dependencies(dependencies) if args.tokens: dependencies = set_https(dependencies) @@ -86,17 +87,18 @@ def main(): if not isinstance(opts, list): opts = [opts] - for i, values in enumerate(opts): - if i == 0: - get_source( - values["source"], - values["ref"], - dest, - dependency, - args.mirrors, - args.mirror_loc, - ) - continue + # Clone the first provided source + values = opts.pop(0) + get_source( + values["source"], + values["ref"], + dest, + dependency, + args.mirrors, + args.mirror_loc, + ) + # For all other sources, attempt to merge into the first + for values in opts: merge_source( values["source"], values["ref"], diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index a2ae5478..4b42e79a 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -14,7 +14,7 @@ import os from pathlib import Path from ast import literal_eval -from get_git_sources import get_source, merge_source, set_https +from get_git_sources import get_source, merge_source, set_https, validate_dependencies import logging import sys @@ -36,11 +36,13 @@ def main() -> None: clone_loc = Path(os.environ["SOURCE_DIRECTORY"]) dependencies: dict = literal_eval(os.environ["DEPENDENCIES"]) + validate_dependencies(dependencies) - if os.environ.get("USE_TOKENS", "False") == "True": + if os.environ.get("USE_TOKENS", "false").lower() == "true": dependencies = set_https(dependencies) - use_mirrors = os.environ.get("USE_MIRRORS", "False") == "True" + use_mirrors = os.environ.get("USE_MIRRORS", "false").lower() == "true" + mirror_loc = Path(os.getenv("GIT_MIRROR_LOC", "")) / "MetOffice" for dependency, opts in dependencies.items(): loc = clone_loc / dependency @@ -48,20 +50,18 @@ def main() -> None: if not isinstance(opts, list): opts = [opts] - for i, values in enumerate(opts): - mirror_loc = Path(os.getenv("GIT_MIRROR_LOC", "")) / "MetOffice" - - if i == 0: - get_source( - values["source"], - values["ref"], - loc, - dependency, - use_mirrors, - mirror_loc, - ) - continue - + # Clone the first provided source + values = opts.pop(0) + get_source( + values["source"], + values["ref"], + loc, + dependency, + use_mirrors, + mirror_loc, + ) + # For all other sources, attempt to merge into the first + for values in opts: merge_source( values["source"], values["ref"],