From beba0781fe4e960d47c9944f3069fe59801ec950 Mon Sep 17 00:00:00 2001
From: jennyhickson <61183013+jennyhickson@users.noreply.github.com>
Date: Thu, 29 Jan 2026 12:35:10 +0000
Subject: [PATCH 01/10] Expand list of repositories for milestones

---
 sbin/gh_manage_milestones | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/sbin/gh_manage_milestones b/sbin/gh_manage_milestones
index e0a459cc..f2fd3b6b 100755
--- a/sbin/gh_manage_milestones
+++ b/sbin/gh_manage_milestones
@@ -13,17 +13,26 @@ set -euo pipefail
 
 # -- Modify milestones in relevant repositories
 REPOS=(
-  "MetOffice/um"
+  "MetOffice/casim"
+  "MetOffice/gcom"
+  "MetOffice/git_playground"
+  "MetOffice/growss"
   "MetOffice/jules"
   "MetOffice/lfric_apps"
   "MetOffice/lfric_core"
-  "MetOffice/ukca"
-  "MetOffice/casim"
-  "MetOffice/socrates"
-  "MetOffice/um_doc"
+  "MetOffice/moci"
+  "MetOffice/mule"
+  "MetOffice/rose_picker"
+  "MetOffice/shumlib"
   "MetOffice/simulation-systems"
   "MetOffice/SimSys_Scripts"
-  "MetOffice/git_playground"
+  "MetOffice/socrates"
+  "MetOffice/socrates-spectral"
+  "MetOffice/ukca"
+  "MetOffice/um"
+  "MetOffice/um_aux"
+  "MetOffice/um_doc"
+  "MetOffice/um_meta"
 )
 
 usage() {
@@ -175,9 +184,9 @@ for repo in "${REPOS[@]}"; do
     fi
 
     # -- Build GH command from optional arguments.
-    gh_args=(-f "title=\"${TITLE}\"")
+    gh_args=(-f "title=${TITLE}")
     [[ -n "$DUE" ]] && gh_args+=(-f "due_on=${DUE}")
-    [[ -n "$DESC" ]] && gh_args+=(-f "description=\"${DESC}\"")
+    [[ -n "$DESC" ]] && gh_args+=(-f "description=${DESC}")
 
 
     # -- Create or update the milestone

From eaef11d3cbba7780f4540c5e7b4c429710e9d0a6 Mon Sep 17 00:00:00 2001
From: jennyhickson <61183013+jennyhickson@users.noreply.github.com>
Date: Thu, 29 Jan 2026 12:35:26 +0000
Subject: [PATCH 02/10] Extract project class into own file

---
 workload/review_project.py | 142 +++++++++++++++++++++++++++++++++++++
 workload/workload.py       | 129 ++++++++-------------------------
 2 files changed, 172 insertions(+), 99 deletions(-)
 create mode 100644 workload/review_project.py

diff --git a/workload/review_project.py b/workload/review_project.py
new file mode 100644
index 00000000..8167b98e
--- /dev/null
+++ b/workload/review_project.py
@@ -0,0 +1,142 @@
+# -----------------------------------------------------------------------------
+# (C) Crown copyright Met Office. All rights reserved.
+# The file LICENCE, distributed with this code, contains details of the terms
+# under which the code may be used.
+# -----------------------------------------------------------------------------
+
+'''
+Class and functions for interacting with the Simulation Systems Review Tracker
+Project.
+'''
+
+import json
+import subprocess
+from pathlib import Path
+
+class ProjectData:
+    """
+    A class to hold GitHub project data
+
+    data: dict Raw data from the project
+    review_data: list Data filtered to contain a list of review tuples
+                 (reviewer, repository)
+    """
+
+    def __init__(self, test: bool = False, capture: bool = False):
+        self.raw_data = {}
+        self.data = {}
+        self.review_data = []
+
+        self.fetch_project_data(test, capture)
+        self.extract_data()
+
+    def fetch_project_data(self, test: bool, capture: bool):
+        """
+        Retrieve data from GitHub API or a from a test file.
+        """
+        if test:
+            file = Path(__file__).parent / "test" / "test.json"
+            with open(file) as f:
+                self.raw_data = json.loads(f.read())
+
+        else:
+            command = "gh project item-list 376 -L 500 --owner MetOffice --format json"
+            output = subprocess.run(command.split(), capture_output=True, timeout=180)
+            if output.returncode:
+                raise RuntimeError(
+                    "Error fetching GitHub Project data:  \n " + output.stderr.decode()
+                )
+
+            self.raw_data = json.loads(output.stdout)
+
+            if capture:
+                file = Path(__file__).parent / "test" / "test.json"
+                with open(file, "w") as f:
+                    json.dump(self.raw_data, f)
+                print(
+                    "Project data saved to test.json. Use --test to run with"
+                    " the captured data."
+                )
+
+    def extract_data(self):
+        for pr in self.raw_data["items"]:
+            pull_request = {}
+            pull_request["id"] = pr["id"]
+            pull_request["title"] = pr["content"]["title"]
+            pull_request["number"] = pr["content"]["number"]
+
+            if "status" in pr:
+                pull_request["status"] = pr["status"]
+            else:
+                pull_request["status"] = None
+
+            if "milestone" in pr:
+                pull_request["milestone"] = pr["milestone"]["title"]
+            else:
+                pull_request["milestone"] = None
+
+            if "assignee" in pr:
+                pull_request["assignee"] = pr["assignees"]
+            else:
+                pull_request["assignee"] = None
+
+            if "code Review" in pr:
+                pull_request["code review"] = pr["code Review"]
+            else:
+                pull_request["code review"] = None
+
+            if "sciTech Review" in pr:
+                pull_request["scitech review"] = pr["sciTech Review"]
+            else:
+                pull_request["scitech review"] = None
+
+            repo = pr["content"]["repository"].replace("MetOffice/", "")
+            if repo in self.data:
+                self.data[repo].append(pull_request)
+            else:
+                self.data[repo] = [pull_request]
+
+    def get_reviewers_for_repo(self, repo: str, test: bool = False) -> list:
+        """
+        Return a list of reviewers for a given repository.
+        """
+        if repo in self.data:
+            pull_requests = self.data[repo]
+        else:
+            return []
+
+        reviewers = []
+
+        if test:
+            print("\n=== Reviewers for repository " + repo)
+
+        for pr in pull_requests:
+            sr = pr["scitech review"]
+            if sr:
+                reviewers.append(sr)
+
+            cr = pr["code review"]
+            if cr:
+                reviewers.append(cr)
+
+            if test and (cr or sr):
+                # Handle case where these are None
+                if not sr:
+                    sr = ""
+                if not cr:
+                    cr = ""
+
+                print(
+                    "SciTech:",
+                    f"{sr: <18}",
+                    "Code:",
+                    f"{cr: <18}",
+                    pr["title"],
+                )
+
+        return reviewers
+
+    def get_repositories(self) -> list:
+        """ Return a list of repositories found in the project data."""
+
+        return list(self.data.keys())
diff --git a/workload/workload.py b/workload/workload.py
index eba598f1..73a8dc8e 100644
--- a/workload/workload.py
+++ b/workload/workload.py
@@ -1,110 +1,31 @@
+# -----------------------------------------------------------------------------
+# (C) Crown copyright Met Office. All rights reserved.
+# The file LICENCE, distributed with this code, contains details of the terms
+# under which the code may be used.
+# -----------------------------------------------------------------------------
+
+'''
+This script will read the details of pull requests from the Simulation Systems
+Review Tracker project and print tables of the number of reviews assigned to
+each reviewer.
+'''
+
 import argparse
 import json
 import subprocess
 from pathlib import Path
-
 from prettytable import PrettyTable
 
+from review_project import ProjectData
+
 lfric_repositories = [
     "lfric_apps",
     "lfric_core",
 ]
 
-ssd_repositories = [
-    "um",
-    "jules",
-    "socrates",
-    "casim",
-    "ukca",
-    "simulation-systems",
-    "SimSys_Scripts",
-    "git_playground",
-    "growss",
-]
-
 adminID = "MGEX82"  # person in github teams as a central admin but not relevant here
 
 
-class ProjectData:
-    """
-    A class to hold GitHub project data. The focus is on review information.
-
-    data: dict Raw data from the project
-    review_data: list Data filtered to contain a list of review tuples
-    """
-
-    def __init__(self, test: bool = False, capture: bool = False):
-        self.data = {}
-        self.review_data = []
-
-        self.fetch_project_data(test, capture)
-        self.filter_reviewers(test)
-
-    def fetch_project_data(self, test: bool, capture: bool):
-        """
-        Retrieve data from GitHub API or a from a test file.
-        """
-        if test:
-            file = Path(__file__).parent / "test" / "test.json"
-            with open(file) as f:
-                self.data = json.loads(f.read())
-
-        else:
-            command = "gh project item-list 376 -L 500 --owner MetOffice --format json"
-            output = subprocess.run(command.split(), capture_output=True, timeout=180)
-            if output.returncode:
-                raise RuntimeError(
-                    "Error fetching GitHub Project data:  \n " + output.stderr.decode()
-                )
-
-            self.data = json.loads(output.stdout)
-
-            if capture:
-                file = Path(__file__).parent / "test" / "test.json"
-                with open(file, "w") as f:
-                    json.dump(self.data, f)
-                print(
-                    "Project data saved to test.json. Use --test to run with"
-                    " the captured data."
-                )
-
-    def filter_reviewers(self, test: bool = False):
-        """
-        Filter the data to create a list of review tuples
-        """
-        all_reviews = self.data["items"]
-        for review in all_reviews:
-            cr = ""
-            sr = ""
-            if "code Review" in review:
-                cr = review["code Review"].strip()
-                self.review_data.append((cr, review["repository"]))
-
-            if "sciTech Review" in review:
-                sr = review["sciTech Review"].strip()
-                self.review_data.append((sr, review["repository"]))
-
-            if test and (cr or sr):
-                print(
-                    "SciTech:",
-                    f"{sr: <18}",
-                    "Code:",
-                    f"{cr: <18}",
-                    f"{review['repository']: <50}",
-                    review["title"],
-                )
-
-    def one_repo(self, repository: str) -> list:
-        """
-        Filter the review data to just that of one repository
-
-        repository: string Name of repository to include
-        return: list All reviewers that have reviews assigned in that repository
-                including duplicates.
-        """
-        return [x[0] for x in self.review_data if repository in x[1]]
-
-
 class Team:
     """
     A class to hold GitHub team data.
@@ -161,6 +82,16 @@ def get_team_members(self) -> list:
         """
         return self.members
 
+def other_repo_list(data: ProjectData, to_exclude: list) -> list:
+    """
+    Create a list of all repositories with data in the project, not including
+    any repositories that are found elsewhere.
+    """
+
+    all_repos = data.get_repositories()
+
+    return sorted(set(all_repos) - set(to_exclude))
+
 
 def count_items(item_list: list) -> dict:
     """
@@ -178,7 +109,7 @@ def count_items(item_list: list) -> dict:
     return count
 
 
-def build_table(data: ProjectData, reviewer_list: list, repos: list) -> PrettyTable:
+def build_table(data: ProjectData, reviewer_list: list, repos: list, test: bool) -> PrettyTable:
     """
     Build a pretty table from the data by extracting just the desired
     repositories and reviewers.
@@ -195,7 +126,7 @@ def build_table(data: ProjectData, reviewer_list: list, repos: list) -> PrettyTa
     totals = [0] * len(reviewer_list)
 
     for repo in repos:
-        review_count = count_items(data.one_repo(repo))
+        review_count = count_items(data.get_reviewers_for_repo(repo, test))
 
         sorted_count = []
         for index, person in enumerate(reviewer_list):
@@ -271,10 +202,10 @@ def main(total: bool, test: bool, capture_project: bool):
     # Create tables for each combination of reviewers and reposotories
     tables = {}
 
-    ## Table for SSD only repositories
-    repo_list = ssd_repositories
+    ## Table for non-LFRic repositories
+    repo_list = other_repo_list(data, lfric_repositories)
     reviewers = teams["SSD"].get_team_members()
-    tables["SSD"] = build_table(data, reviewers, repo_list)
+    tables["SSD"] = build_table(data, reviewers, repo_list, test)
 
     ## Table for LFRic repositories
     repo_list = lfric_repositories
@@ -286,7 +217,7 @@ def main(total: bool, test: bool, capture_project: bool):
         for person in members:
             if person not in reviewers:
                 reviewers.append(person)
-    tables["LFRic"] = build_table(data, reviewers, repo_list)
+    tables["LFRic"] = build_table(data, reviewers, repo_list, test)
 
     # Print tables
     for name, table in tables.items():

From 5bb7738a915cd22375d3103f7f569fc4bb424568 Mon Sep 17 00:00:00 2001
From: jennyhickson <61183013+jennyhickson@users.noreply.github.com>
Date: Thu, 29 Jan 2026 13:45:29 +0000
Subject: [PATCH 03/10] Rename directory

---
 {workload => gh_review_project}/review_project.py               | 0
 {workload => gh_review_project}/test/SimSysCodeReviewers.json   | 0
 .../test/core-capability-development.json                       | 0
 {workload => gh_review_project}/test/ssdteam.json               | 0
 {workload => gh_review_project}/test/test.json                  | 0
 {workload => gh_review_project}/test/toolscollabdev.json        | 0
 {workload => gh_review_project}/workload.py                     | 2 +-
 7 files changed, 1 insertion(+), 1 deletion(-)
 rename {workload => gh_review_project}/review_project.py (100%)
 rename {workload => gh_review_project}/test/SimSysCodeReviewers.json (100%)
 rename {workload => gh_review_project}/test/core-capability-development.json (100%)
 rename {workload => gh_review_project}/test/ssdteam.json (100%)
 rename {workload => gh_review_project}/test/test.json (100%)
 rename {workload => gh_review_project}/test/toolscollabdev.json (100%)
 rename {workload => gh_review_project}/workload.py (98%)

diff --git a/workload/review_project.py b/gh_review_project/review_project.py
similarity index 100%
rename from workload/review_project.py
rename to gh_review_project/review_project.py
diff --git a/workload/test/SimSysCodeReviewers.json b/gh_review_project/test/SimSysCodeReviewers.json
similarity index 100%
rename from workload/test/SimSysCodeReviewers.json
rename to gh_review_project/test/SimSysCodeReviewers.json
diff --git a/workload/test/core-capability-development.json b/gh_review_project/test/core-capability-development.json
similarity index 100%
rename from workload/test/core-capability-development.json
rename to gh_review_project/test/core-capability-development.json
diff --git a/workload/test/ssdteam.json b/gh_review_project/test/ssdteam.json
similarity index 100%
rename from workload/test/ssdteam.json
rename to gh_review_project/test/ssdteam.json
diff --git a/workload/test/test.json b/gh_review_project/test/test.json
similarity index 100%
rename from workload/test/test.json
rename to gh_review_project/test/test.json
diff --git a/workload/test/toolscollabdev.json b/gh_review_project/test/toolscollabdev.json
similarity index 100%
rename from workload/test/toolscollabdev.json
rename to gh_review_project/test/toolscollabdev.json
diff --git a/workload/workload.py b/gh_review_project/workload.py
similarity index 98%
rename from workload/workload.py
rename to gh_review_project/workload.py
index 73a8dc8e..5f5a3bc0 100644
--- a/workload/workload.py
+++ b/gh_review_project/workload.py
@@ -166,7 +166,7 @@ def parse_args():
     """
 
     parser = argparse.ArgumentParser(
-        "Create tables of review workload based on Simulation Systems Review Tracker"
+        "Create tables of review gh_review_project based on Simulation Systems Review Tracker"
     )
     parser.add_argument(
         "--total",

From 5b0bd9515be1f9a8cbc5cd6578e9a061d2ed9b20 Mon Sep 17 00:00:00 2001
From: jennyhickson <61183013+jennyhickson@users.noreply.github.com>
Date: Thu, 29 Jan 2026 13:49:17 +0000
Subject: [PATCH 04/10] tidy unneeded structure.

---
 gh_review_project/review_project.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gh_review_project/review_project.py b/gh_review_project/review_project.py
index 8167b98e..b636fba6 100644
--- a/gh_review_project/review_project.py
+++ b/gh_review_project/review_project.py
@@ -17,15 +17,14 @@ class ProjectData:
     """
     A class to hold GitHub project data
 
-    data: dict Raw data from the project
-    review_data: list Data filtered to contain a list of review tuples
-                 (reviewer, repository)
+    raw_data: dict Raw data from the project
+    data: dict Data filtered to contain most needed pull request details,
+               sorted by repository.
     """
 
     def __init__(self, test: bool = False, capture: bool = False):
         self.raw_data = {}
         self.data = {}
-        self.review_data = []
 
         self.fetch_project_data(test, capture)
         self.extract_data()

From 52108986e401ad2a5eaf22c68222a7b9d6702e7e Mon Sep 17 00:00:00 2001
From: jennyhickson <61183013+jennyhickson@users.noreply.github.com>
Date: Thu, 29 Jan 2026 13:53:51 +0000
Subject: [PATCH 05/10] typo

---
 gh_review_project/workload.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gh_review_project/workload.py b/gh_review_project/workload.py
index 5f5a3bc0..73a8dc8e 100644
--- a/gh_review_project/workload.py
+++ b/gh_review_project/workload.py
@@ -166,7 +166,7 @@ def parse_args():
     """
 
     parser = argparse.ArgumentParser(
-        "Create tables of review gh_review_project based on Simulation Systems Review Tracker"
+        "Create tables of review workload based on Simulation Systems Review Tracker"
     )
     parser.add_argument(
         "--total",

From e11cfa5b0b85c26ed1d0268d4868beae252cc3ab Mon Sep 17 00:00:00 2001
From: jennyhickson <61183013+jennyhickson@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:23:17 +0000
Subject: [PATCH 06/10] black

---
 gh_review_project/review_project.py | 7 ++++---
 gh_review_project/workload.py       | 9 ++++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gh_review_project/review_project.py b/gh_review_project/review_project.py
index b636fba6..67956aef 100644
--- a/gh_review_project/review_project.py
+++ b/gh_review_project/review_project.py
@@ -4,15 +4,16 @@
 # under which the code may be used.
 # -----------------------------------------------------------------------------
 
-'''
+"""
 Class and functions for interacting with the Simulation Systems Review Tracker
 Project.
-'''
+"""
 
 import json
 import subprocess
 from pathlib import Path
 
+
 class ProjectData:
     """
     A class to hold GitHub project data
@@ -136,6 +137,6 @@ def get_reviewers_for_repo(self, repo: str, test: bool = False) -> list:
         return reviewers
 
     def get_repositories(self) -> list:
-        """ Return a list of repositories found in the project data."""
+        """Return a list of repositories found in the project data."""
 
         return list(self.data.keys())
diff --git a/gh_review_project/workload.py b/gh_review_project/workload.py
index 73a8dc8e..16118ca8 100644
--- a/gh_review_project/workload.py
+++ b/gh_review_project/workload.py
@@ -4,11 +4,11 @@
 # under which the code may be used.
 # -----------------------------------------------------------------------------
 
-'''
+"""
 This script will read the details of pull requests from the Simulation Systems
 Review Tracker project and print tables of the number of reviews assigned to
 each reviewer.
-'''
+"""
 
 import argparse
 import json
@@ -82,6 +82,7 @@ def get_team_members(self) -> list:
         """
         return self.members
 
+
 def other_repo_list(data: ProjectData, to_exclude: list) -> list:
     """
     Create a list of all repositories with data in the project, not including
@@ -109,7 +110,9 @@ def count_items(item_list: list) -> dict:
     return count
 
 
-def build_table(data: ProjectData, reviewer_list: list, repos: list, test: bool) -> PrettyTable:
+def build_table(
+    data: ProjectData, reviewer_list: list, repos: list, test: bool
+) -> PrettyTable:
     """
     Build a pretty table from the data by extracting just the desired
     repositories and reviewers.

From 6c8214ad2557889f244b17a2f5f2f5ee19c1d3e6 Mon Sep 17 00:00:00 2001
From: jennyhickson <61183013+jennyhickson@users.noreply.github.com>
Date: Thu, 29 Jan 2026 16:02:22 +0000
Subject: [PATCH 07/10] refactor with classmethods

---
 gh_review_project/review_project.py | 89 +++++++++++++++++------------
 gh_review_project/workload.py       | 25 +++++---
 2 files changed, 70 insertions(+), 44 deletions(-)

diff --git a/gh_review_project/review_project.py b/gh_review_project/review_project.py
index 67956aef..d54d850b 100644
--- a/gh_review_project/review_project.py
+++ b/gh_review_project/review_project.py
@@ -18,48 +18,63 @@ class ProjectData:
     """
     A class to hold GitHub project data
 
-    raw_data: dict Raw data from the project
     data: dict Data filtered to contain most needed pull request details,
                sorted by repository.
+    test: bool Run using test data and extra logging.
     """
 
-    def __init__(self, test: bool = False, capture: bool = False):
-        self.raw_data = {}
-        self.data = {}
+    def __init__(self, data: dict, test: bool = False):
+        self.data = data
+        self.test = test
 
-        self.fetch_project_data(test, capture)
-        self.extract_data()
-
-    def fetch_project_data(self, test: bool, capture: bool):
+    @classmethod
+    def from_github(cls, capture: bool = False, file: Path = None) -> "ProjectData":
         """
-        Retrieve data from GitHub API or a from a test file.
+        Retrieve data from GitHub API and initialise the class.
         """
-        if test:
-            file = Path(__file__).parent / "test" / "test.json"
-            with open(file) as f:
-                self.raw_data = json.loads(f.read())
-
-        else:
-            command = "gh project item-list 376 -L 500 --owner MetOffice --format json"
-            output = subprocess.run(command.split(), capture_output=True, timeout=180)
-            if output.returncode:
-                raise RuntimeError(
-                    "Error fetching GitHub Project data:  \n " + output.stderr.decode()
-                )
+        command = "gh project item-list 376 -L 500 --owner MetOffice --format json"
+        output = subprocess.run(command.split(), capture_output=True, timeout=180)
+        if output.returncode:
+            raise RuntimeError(
+                "Error fetching GitHub Project data:  \n " + output.stderr.decode()
+            )
 
-            self.raw_data = json.loads(output.stdout)
+        raw_data = json.loads(output.stdout)
 
-            if capture:
-                file = Path(__file__).parent / "test" / "test.json"
+        if capture:
+            if file:
                 with open(file, "w") as f:
-                    json.dump(self.raw_data, f)
+                    json.dump(raw_data, f)
                 print(
-                    "Project data saved to test.json. Use --test to run with"
-                    " the captured data."
+                    f"Project data saved to {file}."
                 )
+            else:
+                print("Unable to capture data as filename not specified.")
+
+        data = cls._extract_data(raw_data)
+        return cls(data, test=False)
 
-    def extract_data(self):
-        for pr in self.raw_data["items"]:
+    @classmethod
+    def from_file(cls, file: Path) -> "ProjectData":
+        """
+        Retrieve data from test file and initialise the class.
+        """
+        with open(file) as f:
+            raw_data = json.loads(f.read())
+
+        data = cls._extract_data(raw_data)
+        return cls(data, test=True)
+
+    @classmethod
+    def _extract_data(cls, raw_data: dict) -> dict:
+        """
+        Extract useful information from the raw data and
+        store it in a dictionary keyed by repository.
+        """
+
+        data = {}
+
+        for pr in raw_data["items"]:
             pull_request = {}
             pull_request["id"] = pr["id"]
             pull_request["title"] = pr["content"]["title"]
@@ -91,12 +106,14 @@ def extract_data(self):
                 pull_request["scitech review"] = None
 
             repo = pr["content"]["repository"].replace("MetOffice/", "")
-            if repo in self.data:
-                self.data[repo].append(pull_request)
+            if repo in data:
+                data[repo].append(pull_request)
             else:
-                self.data[repo] = [pull_request]
+                data[repo] = [pull_request]
+
+        return data
 
-    def get_reviewers_for_repo(self, repo: str, test: bool = False) -> list:
+    def get_reviewers_for_repo(self, repo: str) -> list:
         """
         Return a list of reviewers for a given repository.
         """
@@ -107,8 +124,8 @@ def get_reviewers_for_repo(self, repo: str, test: bool = False) -> list:
 
         reviewers = []
 
-        if test:
-            print("\n=== Reviewers for repository " + repo)
+        if self.test:
+            print("\n=== Reviewers for " + repo)
 
         for pr in pull_requests:
             sr = pr["scitech review"]
@@ -119,7 +136,7 @@ def get_reviewers_for_repo(self, repo: str, test: bool = False) -> list:
             if cr:
                 reviewers.append(cr)
 
-            if test and (cr or sr):
+            if self.test and (cr or sr):
                 # Handle case where these are None
                 if not sr:
                     sr = ""
diff --git a/gh_review_project/workload.py b/gh_review_project/workload.py
index 16118ca8..a6685bbb 100644
--- a/gh_review_project/workload.py
+++ b/gh_review_project/workload.py
@@ -25,6 +25,7 @@
 
 adminID = "MGEX82"  # person in github teams as a central admin but not relevant here
 
+testfile = Path(__file__).parent / "test" / "test.json"
 
 class Team:
     """
@@ -111,8 +112,7 @@ def count_items(item_list: list) -> dict:
 
 
 def build_table(
-    data: ProjectData, reviewer_list: list, repos: list, test: bool
-) -> PrettyTable:
+    data: ProjectData, reviewer_list: list, repos: list) -> PrettyTable:
     """
     Build a pretty table from the data by extracting just the desired
     repositories and reviewers.
@@ -129,7 +129,7 @@ def build_table(
     totals = [0] * len(reviewer_list)
 
     for repo in repos:
-        review_count = count_items(data.get_reviewers_for_repo(repo, test))
+        review_count = count_items(data.get_reviewers_for_repo(repo))
 
         sorted_count = []
         for index, person in enumerate(reviewer_list):
@@ -186,14 +186,23 @@ def parse_args():
         action="store_true",
         help="Capture the current project status into the test file",
     )
+    parser.add_argument(
+        "--file",
+        default=testfile,
+        help="Filepath to test data for either capture the project status, "
+             "or use as input data.",
+    )
 
     return parser.parse_args()
 
 
-def main(total: bool, test: bool, capture_project: bool):
+def main(total: bool, test: bool, capture_project: bool, file: Path):
 
     # Extract data from github about the reviews and team members.
-    data = ProjectData(test, capture_project)
+    if test:
+        data = ProjectData.from_file(file)
+    else:
+        data = ProjectData.from_github(capture_project, file)
 
     teams = {
         "SSD": Team("ssdteam", test),
@@ -208,7 +217,7 @@ def main(total: bool, test: bool, capture_project: bool):
     ## Table for non-LFRic repositories
     repo_list = other_repo_list(data, lfric_repositories)
     reviewers = teams["SSD"].get_team_members()
-    tables["SSD"] = build_table(data, reviewers, repo_list, test)
+    tables["SSD"] = build_table(data, reviewers, repo_list)
 
     ## Table for LFRic repositories
     repo_list = lfric_repositories
@@ -220,7 +229,7 @@ def main(total: bool, test: bool, capture_project: bool):
         for person in members:
             if person not in reviewers:
                 reviewers.append(person)
-    tables["LFRic"] = build_table(data, reviewers, repo_list, test)
+    tables["LFRic"] = build_table(data, reviewers, repo_list)
 
     # Print tables
     for name, table in tables.items():
@@ -229,4 +238,4 @@ def main(total: bool, test: bool, capture_project: bool):
 
 if __name__ == "__main__":
     args = parse_args()
-    main(args.total, args.test, args.capture_project)
+    main(args.total, args.test, args.capture_project, args.file)

From 134dc398bc67273ae97a7f0e4887733853166393 Mon Sep 17 00:00:00 2001
From: jennyhickson <61183013+jennyhickson@users.noreply.github.com>
Date: Thu, 29 Jan 2026 16:03:39 +0000
Subject: [PATCH 08/10] black

---
 gh_review_project/review_project.py | 4 +---
 gh_review_project/workload.py       | 6 +++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/gh_review_project/review_project.py b/gh_review_project/review_project.py
index d54d850b..16f0d851 100644
--- a/gh_review_project/review_project.py
+++ b/gh_review_project/review_project.py
@@ -45,9 +45,7 @@ def from_github(cls, capture: bool = False, file: Path = None) -> "ProjectData":
             if file:
                 with open(file, "w") as f:
                     json.dump(raw_data, f)
-                print(
-                    f"Project data saved to {file}."
-                )
+                print(f"Project data saved to {file}.")
             else:
                 print("Unable to capture data as filename not specified.")
 
diff --git a/gh_review_project/workload.py b/gh_review_project/workload.py
index a6685bbb..7d4b51eb 100644
--- a/gh_review_project/workload.py
+++ b/gh_review_project/workload.py
@@ -27,6 +27,7 @@
 
 testfile = Path(__file__).parent / "test" / "test.json"
 
+
 class Team:
     """
     A class to hold GitHub team data.
@@ -111,8 +112,7 @@ def count_items(item_list: list) -> dict:
     return count
 
 
-def build_table(
-    data: ProjectData, reviewer_list: list, repos: list) -> PrettyTable:
+def build_table(data: ProjectData, reviewer_list: list, repos: list) -> PrettyTable:
     """
     Build a pretty table from the data by extracting just the desired
     repositories and reviewers.
@@ -190,7 +190,7 @@ def parse_args():
         "--file",
         default=testfile,
         help="Filepath to test data for either capture the project status, "
-             "or use as input data.",
+        "or use as input data.",
     )
 
     return parser.parse_args()

From 94a9e0f9007a088f9b418f36222ff8018807867e Mon Sep 17 00:00:00 2001
From: jennyhickson <61183013+jennyhickson@users.noreply.github.com>
Date: Thu, 29 Jan 2026 16:24:02 +0000
Subject: [PATCH 09/10] process filepath better

---
 gh_review_project/workload.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/gh_review_project/workload.py b/gh_review_project/workload.py
index 7d4b51eb..f840d5ae 100644
--- a/gh_review_project/workload.py
+++ b/gh_review_project/workload.py
@@ -25,8 +25,6 @@
 
 adminID = "MGEX82"  # person in github teams as a central admin but not relevant here
 
-testfile = Path(__file__).parent / "test" / "test.json"
-
 
 class Team:
     """
@@ -168,6 +166,8 @@ def parse_args():
     Read command line args
     """
 
+    testfile = Path(__file__).parent / "test" / "test.json"
+
     parser = argparse.ArgumentParser(
         "Create tables of review workload based on Simulation Systems Review Tracker"
     )
@@ -193,7 +193,12 @@ def parse_args():
         "or use as input data.",
     )
 
-    return parser.parse_args()
+    args = parser.parse_args()
+
+    args.file = Path(args.file)
+    args.file = args.file.expanduser().resolve()
+
+    return args
 
 
 def main(total: bool, test: bool, capture_project: bool, file: Path):

From 0c2ab39c6a27388d6282401489f21f7d0492585c Mon Sep 17 00:00:00 2001
From: jennyhickson <61183013+jennyhickson@users.noreply.github.com>
Date: Mon, 2 Feb 2026 08:54:48 +0000
Subject: [PATCH 10/10] CR suggestions

---
 gh_review_project/review_project.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gh_review_project/review_project.py b/gh_review_project/review_project.py
index 16f0d851..f4ad7f36 100644
--- a/gh_review_project/review_project.py
+++ b/gh_review_project/review_project.py
@@ -8,10 +8,13 @@
 Class and functions for interacting with the Simulation Systems Review Tracker
 Project.
 """
+from __future__ import annotations
 
 import json
 import subprocess
 from pathlib import Path
+import shlex
+from collections import defaultdict
 
 
 class ProjectData:
@@ -28,12 +31,12 @@ def __init__(self, data: dict, test: bool = False):
         self.test = test
 
     @classmethod
-    def from_github(cls, capture: bool = False, file: Path = None) -> "ProjectData":
+    def from_github(cls, capture: bool = False, file: Path = None) -> ProjectData:
         """
         Retrieve data from GitHub API and initialise the class.
         """
         command = "gh project item-list 376 -L 500 --owner MetOffice --format json"
-        output = subprocess.run(command.split(), capture_output=True, timeout=180)
+        output = subprocess.run(shlex.split(command), capture_output=True, timeout=180)
         if output.returncode:
             raise RuntimeError(
                 "Error fetching GitHub Project data:  \n " + output.stderr.decode()
@@ -53,7 +56,7 @@ def from_github(cls, capture: bool = False, file: Path = None) -> "ProjectData":
         return cls(data, test=False)
 
     @classmethod
-    def from_file(cls, file: Path) -> "ProjectData":
+    def from_file(cls, file: Path) -> ProjectData:
         """
         Retrieve data from test file and initialise the class.
         """
@@ -70,7 +73,7 @@ def _extract_data(cls, raw_data: dict) -> dict:
         store it in a dictionary keyed by repository.
         """
 
-        data = {}
+        data = defaultdict(list)
 
         for pr in raw_data["items"]:
             pull_request = {}
@@ -104,10 +107,7 @@ def _extract_data(cls, raw_data: dict) -> dict:
                 pull_request["scitech review"] = None
 
             repo = pr["content"]["repository"].replace("MetOffice/", "")
-            if repo in data:
-                data[repo].append(pull_request)
-            else:
-                data[repo] = [pull_request]
+            data[repo].append(pull_request)
 
         return data