diff --git a/docs/changelog.md b/docs/changelog.md index cbc7327f75..e8dc13543c 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -4,6 +4,8 @@ This changelog documents user-relevant changes to the GitHub runner charm. ## 2026-03-20 +- The pressure reconciler is now always used, replacing the legacy reconcile mode. When no planner relation is configured, it uses `base-virtual-machines` as static pressure to maintain the configured minimum runner count. +- HTTP server endpoints (`/runner/check`, `/runner/flush`) now use `RunnerManager` directly instead of the legacy `RunnerScaler`. - Set GitHub API pagination page size to 100 (up from PyGithub default of 30), reducing API calls when listing runners. ## 2026-03-18 diff --git a/github-runner-manager/pyproject.toml b/github-runner-manager/pyproject.toml index 4346d6800d..7612bab1a2 100644 --- a/github-runner-manager/pyproject.toml +++ b/github-runner-manager/pyproject.toml @@ -3,7 +3,7 @@ [project] name = "github-runner-manager" -version = "0.15.2" +version = "0.16.0" authors = [ { name = "Canonical IS DevOps", email = "is-devops-team@canonical.com" }, ] diff --git a/github-runner-manager/src/github_runner_manager/cli.py b/github-runner-manager/src/github_runner_manager/cli.py index 65a1780227..c00bd069e4 100644 --- a/github-runner-manager/src/github_runner_manager/cli.py +++ b/github-runner-manager/src/github_runner_manager/cli.py @@ -20,8 +20,8 @@ from github_runner_manager.manager.pressure_reconciler import ( PressureReconciler, build_pressure_reconciler, + build_runner_manager, ) -from github_runner_manager.reconcile_service import start_reconcile_service from github_runner_manager.thread_manager import ThreadManager version = importlib.metadata.version("github-runner-manager") @@ -94,19 +94,12 @@ def handle_shutdown( default="INFO", help="The log level for the application.", ) -@click.option( - "--python-path", - type=str, - required=False, - help="The PYTHONPATH to access the github-runner-manager library.", -) # The entry point for the CLI will be tested with integration test. -def main( # pylint: disable=too-many-arguments, too-many-positional-arguments +def main( config_file: TextIO, host: str, port: int, debug: bool, - python_path: str | None, log_level: str, ) -> None: # pragma: no cover """Start the reconcile service. @@ -116,8 +109,10 @@ def main( # pylint: disable=too-many-arguments, too-many-positional-arguments host: The hostname to listen on for the HTTP server port: The port to listen on the HTTP server. debug: Whether to start the application in debug mode. - python_path: PYTHONPATH to access the github-runner-manager library. log_level: The log level. + + Raises: + ClickException: If no non-reactive combinations are configured. """ logging.basicConfig( level=log_level, @@ -128,30 +123,28 @@ def main( # pylint: disable=too-many-arguments, too-many-positional-arguments config = ApplicationConfiguration.from_yaml_file(StringIO(config_file.read())) lock = Lock() + combinations = config.non_reactive_configuration.combinations + if not combinations: + raise click.ClickException("No non-reactive combinations configured.") + runner_manager = build_runner_manager(config, combinations[0]) + pressure_reconciler = build_pressure_reconciler(config, runner_manager, lock) + thread_manager = ThreadManager() http_server_args = FlaskArgs(host=host, port=port, debug=debug) thread_manager.add_thread( - target=partial(start_http_server, config, lock, http_server_args), + target=partial(start_http_server, runner_manager, lock, http_server_args), daemon=True, ) - if config.planner_url and config.planner_token: - pressure_reconciler = build_pressure_reconciler(config, lock) - shutdown = partial( - handle_shutdown, - pressure_reconciler=pressure_reconciler, - thread_manager=thread_manager, - ) - signal.signal(signal.SIGTERM, shutdown) - signal.signal(signal.SIGINT, shutdown) - thread_manager.add_thread(target=pressure_reconciler.start_create_loop, daemon=True) - thread_manager.add_thread(target=pressure_reconciler.start_reconcile_loop, daemon=True) - # Legacy mode is still supported for deployments without planner config. - else: - thread_manager.add_thread( - target=partial(start_reconcile_service, config, python_path, lock), - daemon=True, - ) + shutdown = partial( + handle_shutdown, + pressure_reconciler=pressure_reconciler, + thread_manager=thread_manager, + ) + signal.signal(signal.SIGTERM, shutdown) + signal.signal(signal.SIGINT, shutdown) + thread_manager.add_thread(target=pressure_reconciler.start_create_loop, daemon=True) + thread_manager.add_thread(target=pressure_reconciler.start_reconcile_loop, daemon=True) thread_manager.start() thread_manager.raise_on_error() diff --git a/github-runner-manager/src/github_runner_manager/http_server.py b/github-runner-manager/src/github_runner_manager/http_server.py index 777d376699..fb6c9f9662 100644 --- a/github-runner-manager/src/github_runner_manager/http_server.py +++ b/github-runner-manager/src/github_runner_manager/http_server.py @@ -1,12 +1,8 @@ # Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. -"""The HTTP server for github-runner-manager. +"""The HTTP server for github-runner-manager.""" -The HTTP server for request to the github-runner-manager. -""" - -import dataclasses import json from dataclasses import dataclass from threading import Lock @@ -14,13 +10,10 @@ from flask import Flask, request from prometheus_client import generate_latest -from github_runner_manager.configuration import ApplicationConfiguration from github_runner_manager.errors import CloudError, LockError -from github_runner_manager.manager.runner_manager import FlushMode -from github_runner_manager.reconcile_service import get_runner_scaler +from github_runner_manager.manager.runner_manager import FlushMode, RunnerManager -APP_CONFIG_NAME = "app_config" -OPENSTACK_CONFIG_NAME = "openstack_config" +RUNNER_MANAGER_CONFIG_NAME = "runner_manager" app = Flask(__name__) @@ -45,15 +38,23 @@ def check_runner() -> tuple[str, int]: Returns: Information on the runners in JSON format. """ - app_config: ApplicationConfiguration = app.config[APP_CONFIG_NAME] + runner_manager: RunnerManager = app.config[RUNNER_MANAGER_CONFIG_NAME] app.logger.info("Checking runners...") - runner_scaler = get_runner_scaler(app_config) try: - runner_info = runner_scaler.get_runner_info() + runner_info = runner_manager.get_runner_info() except CloudError as err: app.logger.exception("Cloud error encountered while getting runner info") return (str(err), 500) - return (json.dumps(dataclasses.asdict(runner_info)), 200) + + response = { + "online": runner_info.online, + "busy": runner_info.busy, + "offline": runner_info.offline, + "unknown": runner_info.unknown, + "runners": list(runner_info.runners), + "busy_runners": list(runner_info.busy_runners), + } + return (json.dumps(response), 200) @app.route("/runner/flush", methods=["POST"]) @@ -66,7 +67,7 @@ def flush_runner() -> tuple[str, int]: Returns: A empty response. """ - app_config = app.config[APP_CONFIG_NAME] + runner_manager: RunnerManager = app.config[RUNNER_MANAGER_CONFIG_NAME] flush_busy_str = request.args.get("flush-busy") flush_busy = False @@ -76,15 +77,13 @@ def flush_runner() -> tuple[str, int]: lock = _get_lock() with lock: app.logger.info("Flushing runners...") - runner_scaler = get_runner_scaler(app_config) app.logger.info("Flushing busy: %s", flush_busy) flush_mode = FlushMode.FLUSH_BUSY if flush_busy else FlushMode.FLUSH_IDLE try: - num_flushed = runner_scaler.flush(flush_mode) + runner_manager.flush_runners(flush_mode) except CloudError as err: app.logger.exception("Cloud error encountered while flushing runners") return (str(err), 500) - app.logger.info("Flushed %s runners", num_flushed) return ("", 204) @@ -130,14 +129,14 @@ class FlaskArgs: def start_http_server( - app_config: ApplicationConfiguration, + runner_manager: RunnerManager, lock: Lock, flask_args: FlaskArgs, ) -> None: """Start the HTTP server for interacting with the github-runner-manager service. Args: - app_config: The application configuration. + runner_manager: The runner manager for managing runners. lock: The lock representing modification access to the managed set of runners. flask_args: The arguments for the flask HTTP server. """ @@ -145,7 +144,7 @@ def start_http_server( # The lock is passed from the caller, hence the need to update the global variable. global _lock # pylint: disable=global-statement _lock = lock - app.config[APP_CONFIG_NAME] = app_config + app.config[RUNNER_MANAGER_CONFIG_NAME] = runner_manager app.run( host=flask_args.host, port=flask_args.port, diff --git a/github-runner-manager/src/github_runner_manager/manager/pressure_reconciler.py b/github-runner-manager/src/github_runner_manager/manager/pressure_reconciler.py index 66dfd9d443..9cedc0a423 100644 --- a/github-runner-manager/src/github_runner_manager/manager/pressure_reconciler.py +++ b/github-runner-manager/src/github_runner_manager/manager/pressure_reconciler.py @@ -112,7 +112,7 @@ class PressureReconciler: # pylint: disable=too-few-public-methods,too-many-ins def __init__( self, manager: RunnerManager, - planner_client: PlannerClient, + planner_client: PlannerClient | None, config: PressureReconcilerConfig, lock: Lock, ) -> None: @@ -122,6 +122,7 @@ def __init__( manager: Runner manager interface for creating, cleaning up, and listing runners. planner_client: Client used to stream pressure updates. + None when no planner relation is configured. config: Reconciler configuration. lock: Shared lock to serialize operations with other reconcile loops. """ @@ -140,6 +141,15 @@ def start_create_loop(self) -> None: with self._lock: self._runner_count = len(self._manager.get_runners()) logger.info("Create loop: initial sync, _runner_count=%s", self._runner_count) + if self._planner is None: + self._last_pressure = self._config.min_pressure + logger.info( + "Create loop: no planner configured, using min_pressure=%s", + self._config.min_pressure, + ) + self._handle_create_runners(self._config.min_pressure) + self._stop.wait() + return while not self._stop.is_set(): try: for update in self._planner.stream_pressure(self._config.flavor_name): @@ -394,31 +404,38 @@ def _desired_total_from_pressure(self, pressure: int) -> int: return total -def build_pressure_reconciler(config: ApplicationConfiguration, lock: Lock) -> PressureReconciler: +def build_pressure_reconciler( + config: ApplicationConfiguration, manager: RunnerManager, lock: Lock +) -> PressureReconciler: """Construct a PressureReconciler from application configuration. Args: config: Application configuration. + manager: The runner manager to use for creating, cleaning up, and listing runners. lock: Shared lock to serialize operations with other reconcile loops. Raises: - ValueError: If no non-reactive combinations are configured. + ValueError: If planner configuration is partial (only one of URL/token set). Returns: A fully constructed PressureReconciler. """ - combinations = config.non_reactive_configuration.combinations - if not combinations: + first = config.non_reactive_configuration.combinations[0] + planner_client: PlannerClient | None = None + has_url = bool(config.planner_url) + has_token = bool(config.planner_token) + if has_url != has_token: raise ValueError( - "Cannot build PressureReconciler: no non-reactive combinations configured." + "Partial planner configuration: both planner_url and planner_token must be set" + " or both unset." + ) + if has_url and has_token: + planner_client = PlannerClient( + PlannerConfiguration(base_url=config.planner_url, token=config.planner_token) ) - first = combinations[0] - manager = _build_runner_manager(config, first) return PressureReconciler( manager=manager, - planner_client=PlannerClient( - PlannerConfiguration(base_url=config.planner_url, token=config.planner_token) - ), + planner_client=planner_client, config=PressureReconcilerConfig( flavor_name=config.name, reconcile_interval=config.reconcile_interval, @@ -429,7 +446,7 @@ def build_pressure_reconciler(config: ApplicationConfiguration, lock: Lock) -> P ) -def _build_runner_manager( +def build_runner_manager( config: ApplicationConfiguration, combination: NonReactiveCombination ) -> RunnerManager: """Build a RunnerManager from application config and a flavor/image combination. diff --git a/github-runner-manager/src/github_runner_manager/manager/runner_manager.py b/github-runner-manager/src/github_runner_manager/manager/runner_manager.py index cc6e50d06b..cc35f09ba9 100644 --- a/github-runner-manager/src/github_runner_manager/manager/runner_manager.py +++ b/github-runner-manager/src/github_runner_manager/manager/runner_manager.py @@ -1,6 +1,8 @@ # Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. +# RunnerInfo is duplicated in runner_scaler (legacy), will be removed in follow-up PR. +# pylint: disable=duplicate-code """Module for managing the GitHub self-hosted runners hosted on cloud instances.""" import copy @@ -45,6 +47,27 @@ IssuedMetricEventsStats = dict[Type[metric_events.Event], int] +@dataclass(frozen=True) +class RunnerInfo: + """Aggregated information on the runners. + + Attributes: + online: The number of runners in online state. + busy: The number of runners in busy state. + offline: The number of runners in offline state. + unknown: The number of runners in unknown state. + runners: The names of the online runners. + busy_runners: The names of the busy runners. + """ + + online: int + busy: int + offline: int + unknown: int + runners: tuple[str, ...] + busy_runners: tuple[str, ...] + + class FlushMode(Enum): """Strategy for flushing runners. @@ -263,6 +286,42 @@ def get_runners(self) -> tuple[RunnerInstance, ...]: for vm in vms ) + def get_runner_info(self) -> RunnerInfo: + """Get aggregated information on the runners. + + Returns: + Aggregated runner counts and names. + """ + runner_list = self.get_runners() + online = 0 + busy = 0 + offline = 0 + unknown = 0 + online_runners: list[str] = [] + busy_runners: list[str] = [] + for runner in runner_list: + match runner.platform_state: + case PlatformRunnerState.BUSY: + online += 1 + online_runners.append(runner.name) + busy += 1 + busy_runners.append(runner.name) + case PlatformRunnerState.IDLE: + online += 1 + online_runners.append(runner.name) + case PlatformRunnerState.OFFLINE: + offline += 1 + case _: + unknown += 1 + return RunnerInfo( + online=online, + busy=busy, + offline=offline, + unknown=unknown, + runners=tuple(online_runners), + busy_runners=tuple(busy_runners), + ) + def delete_runners(self, num: int) -> IssuedMetricEventsStats: """Delete up to `num` runners, preferring idle ones over busy. diff --git a/github-runner-manager/tests/unit/manager/test_pressure_reconciler.py b/github-runner-manager/tests/unit/manager/test_pressure_reconciler.py index 6770ecf124..e186d39e7c 100644 --- a/github-runner-manager/tests/unit/manager/test_pressure_reconciler.py +++ b/github-runner-manager/tests/unit/manager/test_pressure_reconciler.py @@ -489,3 +489,112 @@ def test_timer_reconcile_emits_reconciliation_metric(monkeypatch: pytest.MonkeyP assert event.idle_runners == 2 # IDLE + OFFLINE+HEALTHY assert event.active_runners == 1 assert event.crashed_runners == 0 + + +def test_create_loop_no_planner_sets_pressure_creates_runners_and_blocks( + monkeypatch: pytest.MonkeyPatch, +): + """ + arrange: A reconciler with no planner client, min_pressure=3, and 1 existing runner. + act: Call start_create_loop. + assert: _last_pressure is set, runners are created to reach min_pressure, then stop is awaited. + """ + mgr = _FakeManager(runners_count=1) + cfg = PressureReconcilerConfig(flavor_name="small", min_pressure=3) + reconciler = PressureReconciler(mgr, planner_client=None, config=cfg, lock=Lock()) + + wait_called = {"called": False} + + def _stop_immediately() -> None: + """Record that wait was called, then stop.""" + wait_called["called"] = True + return None + + monkeypatch.setattr(reconciler._stop, "wait", lambda: _stop_immediately()) + reconciler.start_create_loop() + + assert reconciler._last_pressure == 3 + assert 2 in mgr.created_args + assert wait_called["called"] + + +def test_reconcile_loop_no_planner_uses_min_pressure(monkeypatch: pytest.MonkeyPatch): + """ + arrange: A reconciler with no planner, min_pressure=4, and 2 existing runners. + act: Run the reconcile loop once. + assert: Timer reconcile uses min_pressure and creates runners to reach it. + """ + mgr = _FakeManager(runners_count=2) + cfg = PressureReconcilerConfig(flavor_name="small", min_pressure=4, reconcile_interval=60) + reconciler = PressureReconciler(mgr, planner_client=None, config=cfg, lock=Lock()) + reconciler._last_pressure = 4 + wait_calls = {"count": 0} + + def _wait(_interval: int) -> bool: + """Return False once to enter the loop, then True to exit.""" + wait_calls["count"] += 1 + return wait_calls["count"] > 1 + + monkeypatch.setattr(reconciler._stop, "wait", _wait) + reconciler.start_reconcile_loop() + + assert mgr.cleanup_called == 1 + assert mgr.created_args == [2] + + +def test_build_pressure_reconciler_no_planner_config(): + """ + arrange: An ApplicationConfiguration with planner_url=None and planner_token=None. + act: Call build_pressure_reconciler. + assert: A PressureReconciler is returned with _planner set to None. + """ + from unittest.mock import MagicMock + + from github_runner_manager.manager.pressure_reconciler import build_pressure_reconciler + + mock_config = MagicMock() + mock_config.planner_url = None + mock_config.planner_token = None + mock_config.name = "test" + mock_config.reconcile_interval = 5 + combination = MagicMock() + combination.base_virtual_machines = 2 + combination.max_total_virtual_machines = 10 + mock_config.non_reactive_configuration.combinations = [combination] + + reconciler = build_pressure_reconciler(mock_config, MagicMock(), Lock()) + + assert reconciler._planner is None + + +@pytest.mark.parametrize( + "planner_url, planner_token", + [ + pytest.param("https://planner.example.com", None, id="url_without_token"), + pytest.param(None, "secret-token", id="token_without_url"), + ], +) +def test_build_pressure_reconciler_partial_planner_config_raises( + planner_url: str | None, planner_token: str | None +): + """ + arrange: An ApplicationConfiguration with only one of planner_url/planner_token set. + act: Call build_pressure_reconciler. + assert: A ValueError is raised for partial planner configuration. + """ + from unittest.mock import MagicMock + + from github_runner_manager.manager.pressure_reconciler import build_pressure_reconciler + + mock_config = MagicMock() + mock_config.planner_url = planner_url + mock_config.planner_token = planner_token + mock_config.name = "test" + mock_config.reconcile_interval = 5 + combination = MagicMock() + combination.base_virtual_machines = 2 + combination.max_total_virtual_machines = 10 + mock_config.non_reactive_configuration.combinations = [combination] + + with pytest.raises(ValueError, match="[Pp]artial"): + build_pressure_reconciler(mock_config, MagicMock(), Lock()) diff --git a/github-runner-manager/tests/unit/manager/test_runner_manager.py b/github-runner-manager/tests/unit/manager/test_runner_manager.py index 432fd25d6f..e138a9959b 100644 --- a/github-runner-manager/tests/unit/manager/test_runner_manager.py +++ b/github-runner-manager/tests/unit/manager/test_runner_manager.py @@ -8,7 +8,12 @@ import pytest from github_runner_manager.manager.models import RunnerMetadata -from github_runner_manager.manager.runner_manager import FlushMode, RunnerInstance, RunnerManager +from github_runner_manager.manager.runner_manager import ( + FlushMode, + RunnerInfo, + RunnerInstance, + RunnerManager, +) from github_runner_manager.manager.vm_manager import VM, CloudRunnerManager from github_runner_manager.platform.platform_provider import PlatformProvider from github_runner_manager.types_.github import SelfHostedRunner @@ -277,6 +282,74 @@ def test_runner_manager_get_runners( assert all(runner.name in runner_names for runner in expected_runner_instances) +@pytest.mark.parametrize( + "initial_runners, initial_cloud_runners, expected_runner_info", + [ + pytest.param( + [], + [], + RunnerInfo(online=0, busy=0, offline=0, unknown=0, runners=(), busy_runners=()), + id="no runners", + ), + pytest.param( + [idle_runner := SelfHostedRunnerFactory(busy=False, status="online")], + [CloudRunnerInstanceFactory.from_self_hosted_runner(self_hosted_runner=idle_runner)], + RunnerInfo( + online=1, + busy=0, + offline=0, + unknown=0, + runners=(idle_runner.identity.instance_id.name,), + busy_runners=(), + ), + id="one idle runner", + ), + pytest.param( + [busy_runner := SelfHostedRunnerFactory(busy=True, status="online")], + [CloudRunnerInstanceFactory.from_self_hosted_runner(self_hosted_runner=busy_runner)], + RunnerInfo( + online=1, + busy=1, + offline=0, + unknown=0, + runners=(busy_runner.identity.instance_id.name,), + busy_runners=(busy_runner.identity.instance_id.name,), + ), + id="one busy runner", + ), + pytest.param( + [offline_runner := SelfHostedRunnerFactory(busy=False, status="offline")], + [ + CloudRunnerInstanceFactory.from_self_hosted_runner( + self_hosted_runner=offline_runner + ) + ], + RunnerInfo(online=0, busy=0, offline=1, unknown=0, runners=(), busy_runners=()), + id="one offline runner", + ), + ], +) +def test_get_runner_info( + initial_runners: list[SelfHostedRunner], + initial_cloud_runners: list[VM], + expected_runner_info: RunnerInfo, +): + """ + arrange: Given GitHub runners and Cloud runners. + act: Call get_runner_info on the RunnerManager. + assert: The returned RunnerInfo matches expected counts and names. + """ + mock_platform = FakeGitHubRunnerPlatform(initial_runners=initial_runners) + mock_cloud = FakeCloudRunnerManager(initial_cloud_runners=initial_cloud_runners) + manager = RunnerManager( + "test-manager", platform_provider=mock_platform, cloud_runner_manager=mock_cloud, labels=[] + ) + + result = manager.get_runner_info() + + assert result == expected_runner_info + + @pytest.mark.parametrize( "initial_runners, initial_cloud_runners, num_delete, expected_runners, expected_cloud_runners", [ diff --git a/github-runner-manager/tests/unit/test_http_server.py b/github-runner-manager/tests/unit/test_http_server.py index b74ec98406..396fe61749 100644 --- a/github-runner-manager/tests/unit/test_http_server.py +++ b/github-runner-manager/tests/unit/test_http_server.py @@ -12,9 +12,8 @@ import pytest from flask.testing import FlaskClient -from github_runner_manager.manager.runner_manager import FlushMode -from src.github_runner_manager.http_server import APP_CONFIG_NAME, OPENSTACK_CONFIG_NAME, app -from src.github_runner_manager.manager.runner_scaler import RunnerInfo, RunnerScaler +from github_runner_manager.manager.runner_manager import FlushMode, RunnerInfo +from src.github_runner_manager.http_server import RUNNER_MANAGER_CONFIG_NAME, app @pytest.fixture(name="lock", scope="function") @@ -22,77 +21,67 @@ def lock_fixture() -> Lock: return Lock() +@pytest.fixture(name="mock_runner_manager", scope="function") +def mock_runner_manager_fixture() -> MagicMock: + return MagicMock() + + @pytest.fixture(name="client", scope="function") -def client_fixture(lock: Lock, monkeypatch) -> Iterator[FlaskClient]: +def client_fixture( + lock: Lock, mock_runner_manager: MagicMock, monkeypatch +) -> Iterator[FlaskClient]: app.debug = True app.config["TESTING"] = True - app.config[APP_CONFIG_NAME] = MagicMock() - app.config[OPENSTACK_CONFIG_NAME] = MagicMock() + app.config[RUNNER_MANAGER_CONFIG_NAME] = mock_runner_manager monkeypatch.setattr("src.github_runner_manager.http_server._lock", lock) with app.test_client() as client: yield client -@pytest.fixture(name="mock_runner_scaler", scope="function") -def mock_runner_scaler_fixture(monkeypatch) -> MagicMock: - mock = MagicMock(spec=RunnerScaler) - monkeypatch.setattr( - "src.github_runner_manager.reconcile_service.RunnerScaler.build", lambda x, y, z: mock - ) - return mock - - def test_flush_runner_default_args( - client: FlaskClient, lock: Lock, mock_runner_scaler: MagicMock + client: FlaskClient, lock: Lock, mock_runner_manager: MagicMock ) -> None: """ - arrange: Start up a test flask server with client. + arrange: Start up a test flask server with a mock runner manager. act: Run flush runner with no args. assert: Should flush idle runners. """ - app.config["lock"] = lock - response = client.post("/runner/flush") assert response.status_code == 204 assert not lock.locked() - mock_runner_scaler.flush.assert_called_once_with(FlushMode.FLUSH_IDLE) + mock_runner_manager.flush_runners.assert_called_once_with(FlushMode.FLUSH_IDLE) def test_flush_runner_flush_busy( - client: FlaskClient, lock: Lock, mock_runner_scaler: MagicMock + client: FlaskClient, lock: Lock, mock_runner_manager: MagicMock ) -> None: """ - arrange: Start up a test flask server with client. + arrange: Start up a test flask server with a mock runner manager. act: Run flush runner with flush-busy = True. assert: Should flush both idle and busy runners. - """ - app.config["lock"] = lock - response = client.post("/runner/flush?flush-busy=true") assert response.status_code == 204 assert not lock.locked() - mock_runner_scaler.flush.assert_called_once_with(FlushMode.FLUSH_BUSY) + mock_runner_manager.flush_runners.assert_called_once_with(FlushMode.FLUSH_BUSY) def test_flush_runner_unlocked( - client: FlaskClient, lock: Lock, mock_runner_scaler: MagicMock + client: FlaskClient, lock: Lock, mock_runner_manager: MagicMock ) -> None: """ - arrange: Start up a test flask server with client. The lock is unlocked. + arrange: Start up a test flask server with a mock runner manager. The lock is unlocked. act: Run flush runner. assert: The flush runner should run. """ - app.config["lock"] = lock - response = client.post("/runner/flush?flush-busy=false") assert response.status_code == 204 assert not lock.locked() - mock_runner_scaler.flush.assert_called_once_with(FlushMode.FLUSH_IDLE) + mock_runner_manager.flush_runners.assert_called_once_with(FlushMode.FLUSH_IDLE) def test_flush_runner_locked(client: FlaskClient, lock: Lock) -> None: @@ -112,26 +101,31 @@ def test_flush_runner_locked(client: FlaskClient, lock: Lock) -> None: flush.terminate() -def test_check_runner(client: FlaskClient, lock: Lock, mock_runner_scaler: MagicMock) -> None: +def test_check_runner(client: FlaskClient, lock: Lock, mock_runner_manager: MagicMock) -> None: """ - arrange: Mock runner scaler to return mock data on get runner info. - act: HTTP Get to /runner/check - assert: Returns the correct status code and content. + arrange: Mock runner manager to return a RunnerInfo. + act: HTTP Get to /runner/check. + assert: Returns the correct status code and serialized RunnerInfo. """ - app.config["lock"] = lock - mock_runner_scaler.get_runner_info.return_value = RunnerInfo( - online=1, busy=0, offline=0, unknown=0, runners=("mock_runner",), busy_runners=tuple() + mock_runner_manager.get_runner_info.return_value = RunnerInfo( + online=2, + busy=1, + offline=1, + unknown=1, + runners=("idle-runner", "busy-runner"), + busy_runners=("busy-runner",), ) response = client.get("/runner/check") assert response.status_code == 200 assert not lock.locked() - assert json.loads(response.text) == { - "online": 1, - "busy": 0, - "offline": 0, - "unknown": 0, - "runners": ["mock_runner"], - "busy_runners": [], + data = json.loads(response.text) + assert data == { + "online": 2, + "busy": 1, + "offline": 1, + "unknown": 1, + "runners": ["idle-runner", "busy-runner"], + "busy_runners": ["busy-runner"], } diff --git a/src/manager_service.py b/src/manager_service.py index 5cbcbab679..5cce925da7 100644 --- a/src/manager_service.py +++ b/src/manager_service.py @@ -6,7 +6,6 @@ import fcntl import json import logging -import os import socket import textwrap from pathlib import Path @@ -344,7 +343,6 @@ def _setup_service_file(unit_name: str, config_file: Path, log_file: Path, log_l log_file: The file location to store the logs. log_level: The log level of the service. """ - python_path = Path(os.getcwd()) / "venv" instance = _normalized_unit(unit_name) # NOTE: Port allocation and persistence are performed under a process-wide # lock in `ensure_http_port_for_unit()`; this returns a stable per-unit port. @@ -360,7 +358,7 @@ def _setup_service_file(unit_name: str, config_file: Path, log_file: Path, log_l Group={constants.RUNNER_MANAGER_GROUP} ExecStart=github-runner-manager --config-file {str(config_file)} --host \ {GITHUB_RUNNER_MANAGER_ADDRESS} --port {http_port} \ ---python-path {str(python_path)} --log-level {log_level} +--log-level {log_level} Restart=on-failure RestartSec=30 RestartSteps=5