MetOffice · james-bruten-mo · Feb 2, 2026 · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026
diff --git a/gh_review_project/review_project.py b/gh_review_project/review_project.py
@@ -0,0 +1,157 @@
+# -----------------------------------------------------------------------------
+# (C) Crown copyright Met Office. All rights reserved.
+# The file LICENCE, distributed with this code, contains details of the terms
+# under which the code may be used.
+# -----------------------------------------------------------------------------
+
+"""
+Class and functions for interacting with the Simulation Systems Review Tracker
+Project.
+"""
+from __future__ import annotations
+
+import json
+import subprocess
+from pathlib import Path
+import shlex
+from collections import defaultdict
+
+
+class ProjectData:
+    """
+    A class to hold GitHub project data
+
+    data: dict Data filtered to contain most needed pull request details,
+               sorted by repository.
+    test: bool Run using test data and extra logging.
+    """
+
+    def __init__(self, data: dict, test: bool = False):
+        self.data = data
+        self.test = test
+
+    @classmethod
+    def from_github(cls, capture: bool = False, file: Path = None) -> ProjectData:
+        """
+        Retrieve data from GitHub API and initialise the class.
+        """
+        command = "gh project item-list 376 -L 500 --owner MetOffice --format json"
+        output = subprocess.run(shlex.split(command), capture_output=True, timeout=180)
+        if output.returncode:
+            raise RuntimeError(
+                "Error fetching GitHub Project data:  \n " + output.stderr.decode()
+            )
+
+        raw_data = json.loads(output.stdout)
+
+        if capture:
+            if file:
+                with open(file, "w") as f:
+                    json.dump(raw_data, f)
+                print(f"Project data saved to {file}.")
+            else:
+                print("Unable to capture data as filename not specified.")
+
+        data = cls._extract_data(raw_data)
+        return cls(data, test=False)
+
+    @classmethod
+    def from_file(cls, file: Path) -> ProjectData:
+        """
+        Retrieve data from test file and initialise the class.
+        """
+        with open(file) as f:
+            raw_data = json.loads(f.read())
+
+        data = cls._extract_data(raw_data)
+        return cls(data, test=True)
+
+    @classmethod
+    def _extract_data(cls, raw_data: dict) -> dict:
+        """
+        Extract useful information from the raw data and
+        store it in a dictionary keyed by repository.
+        """
+
+        data = defaultdict(list)
+
+        for pr in raw_data["items"]:
+            pull_request = {}
+            pull_request["id"] = pr["id"]
+            pull_request["title"] = pr["content"]["title"]
+            pull_request["number"] = pr["content"]["number"]
+
+            if "status" in pr:
+                pull_request["status"] = pr["status"]
+            else:
+                pull_request["status"] = None
+
+            if "milestone" in pr:
+                pull_request["milestone"] = pr["milestone"]["title"]
+            else:
+                pull_request["milestone"] = None
+
+            if "assignee" in pr:
+                pull_request["assignee"] = pr["assignees"]
+            else:
+                pull_request["assignee"] = None
+
+            if "code Review" in pr:
+                pull_request["code review"] = pr["code Review"]
+            else:
+                pull_request["code review"] = None
+
+            if "sciTech Review" in pr:
+                pull_request["scitech review"] = pr["sciTech Review"]
+            else:
+                pull_request["scitech review"] = None
+
+            repo = pr["content"]["repository"].replace("MetOffice/", "")
+            data[repo].append(pull_request)
+
+        return data
+
+    def get_reviewers_for_repo(self, repo: str) -> list:
+        """
+        Return a list of reviewers for a given repository.
+        """
+        if repo in self.data:
+            pull_requests = self.data[repo]
+        else:
+            return []
+
+        reviewers = []
+
+        if self.test:
+            print("\n=== Reviewers for " + repo)
+
+        for pr in pull_requests:
+            sr = pr["scitech review"]
+            if sr:
+                reviewers.append(sr)
+
+            cr = pr["code review"]
+            if cr:
+                reviewers.append(cr)
+
+            if self.test and (cr or sr):
+                # Handle case where these are None
+                if not sr:
+                    sr = ""
+                if not cr:
+                    cr = ""
+
+                print(
+                    "SciTech:",
+                    f"{sr: <18}",
+                    "Code:",
+                    f"{cr: <18}",
+                    pr["title"],
+                )
+
+        return reviewers
+
+    def get_repositories(self) -> list:
+        """Return a list of repositories found in the project data."""
+
+        return list(self.data.keys())
diff --git a/workload/test/SimSysCodeReviewers.json → ...iew_project/test/SimSysCodeReviewers.json b/workload/test/SimSysCodeReviewers.json → ...iew_project/test/SimSysCodeReviewers.json
diff --git a/...oad/test/core-capability-development.json → ...ect/test/core-capability-development.json b/...oad/test/core-capability-development.json → ...ect/test/core-capability-development.json
diff --git a/workload/test/ssdteam.json → gh_review_project/test/ssdteam.json b/workload/test/ssdteam.json → gh_review_project/test/ssdteam.json
diff --git a/workload/test/test.json → gh_review_project/test/test.json b/workload/test/test.json → gh_review_project/test/test.json
diff --git a/workload/test/toolscollabdev.json → gh_review_project/test/toolscollabdev.json b/workload/test/toolscollabdev.json → gh_review_project/test/toolscollabdev.json
diff --git a/workload/workload.py → gh_review_project/workload.py b/workload/workload.py → gh_review_project/workload.py
@@ -1,110 +1,31 @@
+# -----------------------------------------------------------------------------
+# (C) Crown copyright Met Office. All rights reserved.
+# The file LICENCE, distributed with this code, contains details of the terms
+# under which the code may be used.
+# -----------------------------------------------------------------------------
+
+"""
+This script will read the details of pull requests from the Simulation Systems
+Review Tracker project and print tables of the number of reviews assigned to
+each reviewer.
+"""
+
 import argparse
 import json
 import subprocess
 from pathlib import Path
-
 from prettytable import PrettyTable
 
+from review_project import ProjectData
+
 lfric_repositories = [
     "lfric_apps",
     "lfric_core",
 ]
 
-ssd_repositories = [
-    "um",
-    "jules",
-    "socrates",
-    "casim",
-    "ukca",
-    "simulation-systems",
-    "SimSys_Scripts",
-    "git_playground",
-    "growss",
-]
-
 adminID = "MGEX82"  # person in github teams as a central admin but not relevant here
 
 
-class ProjectData:
-    """
-    A class to hold GitHub project data. The focus is on review information.
-
-    data: dict Raw data from the project
-    review_data: list Data filtered to contain a list of review tuples
-    """
-
-    def __init__(self, test: bool = False, capture: bool = False):
-        self.data = {}
-        self.review_data = []
-
-        self.fetch_project_data(test, capture)
-        self.filter_reviewers(test)
-
-    def fetch_project_data(self, test: bool, capture: bool):
-        """
-        Retrieve data from GitHub API or a from a test file.
-        """
-        if test:
-            file = Path(__file__).parent / "test" / "test.json"
-            with open(file) as f:
-                self.data = json.loads(f.read())
-
-        else:
-            command = "gh project item-list 376 -L 500 --owner MetOffice --format json"
-            output = subprocess.run(command.split(), capture_output=True, timeout=180)
-            if output.returncode:
-                raise RuntimeError(
-                    "Error fetching GitHub Project data:  \n " + output.stderr.decode()
-                )
-
-            self.data = json.loads(output.stdout)
-
-            if capture:
-                file = Path(__file__).parent / "test" / "test.json"
-                with open(file, "w") as f:
-                    json.dump(self.data, f)
-                print(
-                    "Project data saved to test.json. Use --test to run with"
-                    " the captured data."
-                )
-
-    def filter_reviewers(self, test: bool = False):
-        """
-        Filter the data to create a list of review tuples
-        """
-        all_reviews = self.data["items"]
-        for review in all_reviews:
-            cr = ""
-            sr = ""
-            if "code Review" in review:
-                cr = review["code Review"].strip()
-                self.review_data.append((cr, review["repository"]))
-
-            if "sciTech Review" in review:
-                sr = review["sciTech Review"].strip()
-                self.review_data.append((sr, review["repository"]))
-
-            if test and (cr or sr):
-                print(
-                    "SciTech:",
-                    f"{sr: <18}",
-                    "Code:",
-                    f"{cr: <18}",
-                    f"{review['repository']: <50}",
-                    review["title"],
-                )
-
-    def one_repo(self, repository: str) -> list:
-        """
-        Filter the review data to just that of one repository
-
-        repository: string Name of repository to include
-        return: list All reviewers that have reviews assigned in that repository
-                including duplicates.
-        """
-        return [x[0] for x in self.review_data if repository in x[1]]
-
-
 class Team:
     """
     A class to hold GitHub team data.
@@ -162,6 +83,17 @@ def get_team_members(self) -> list:
         return self.members
 
 
+def other_repo_list(data: ProjectData, to_exclude: list) -> list:
+    """
+    Create a list of all repositories with data in the project, not including
+    any repositories that are found elsewhere.
+    """
+
+    all_repos = data.get_repositories()
+
+    return sorted(set(all_repos) - set(to_exclude))
+
+
 def count_items(item_list: list) -> dict:
     """
     Count the number of occurrences of each item in a list.
@@ -195,7 +127,7 @@ def build_table(data: ProjectData, reviewer_list: list, repos: list) -> PrettyTa
     totals = [0] * len(reviewer_list)
 
     for repo in repos:
-        review_count = count_items(data.one_repo(repo))
+        review_count = count_items(data.get_reviewers_for_repo(repo))
 
         sorted_count = []
         for index, person in enumerate(reviewer_list):
@@ -234,6 +166,8 @@ def parse_args():
     Read command line args
     """
 
+    testfile = Path(__file__).parent / "test" / "test.json"
+
     parser = argparse.ArgumentParser(
         "Create tables of review workload based on Simulation Systems Review Tracker"
     )
@@ -252,14 +186,28 @@ def parse_args():
         action="store_true",
         help="Capture the current project status into the test file",
     )
+    parser.add_argument(
+        "--file",
+        default=testfile,
+        help="Filepath to test data for either capture the project status, "
+        "or use as input data.",
+    )
+
+    args = parser.parse_args()
+
+    args.file = Path(args.file)
+    args.file = args.file.expanduser().resolve()
 
-    return parser.parse_args()
+    return args
 
 
-def main(total: bool, test: bool, capture_project: bool):
+def main(total: bool, test: bool, capture_project: bool, file: Path):
 
     # Extract data from github about the reviews and team members.
-    data = ProjectData(test, capture_project)
+    if test:
+        data = ProjectData.from_file(file)
+    else:
+        data = ProjectData.from_github(capture_project, file)
 
     teams = {
         "SSD": Team("ssdteam", test),
@@ -271,8 +219,8 @@ def main(total: bool, test: bool, capture_project: bool):
     # Create tables for each combination of reviewers and reposotories
     tables = {}
 
-    ## Table for SSD only repositories
-    repo_list = ssd_repositories
+    ## Table for non-LFRic repositories
+    repo_list = other_repo_list(data, lfric_repositories)
     reviewers = teams["SSD"].get_team_members()
     tables["SSD"] = build_table(data, reviewers, repo_list)
 
@@ -295,4 +243,4 @@ def main(total: bool, test: bool, capture_project: bool):
 
 if __name__ == "__main__":
     args = parse_args()
-    main(args.total, args.test, args.capture_project)
+    main(args.total, args.test, args.capture_project, args.file)