Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

This changelog documents user-relevant changes to the GitHub runner charm.

## 2026-03-30

- Fixed stale `systemd` service files left behind when a unit is removed from a co-located machine. The service is now disabled, the service file removed, and the unit data cleaned up on stop.

## 2026-03-25

- Removed `mongodb` relation. MongoDB-based reactive runner spawning is no longer supported. **If you have an active MongoDB relation, remove it with `juju remove-relation` before upgrading.**
Expand Down
2 changes: 1 addition & 1 deletion github-runner-manager/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

[project]
name = "github-runner-manager"
version = "0.17.0"
version = "0.17.1"
authors = [
{ name = "Canonical IS DevOps", email = "is-devops-team@canonical.com" },
]
Expand Down
2 changes: 1 addition & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ def _log_charm_status(self) -> None:
def _on_stop(self, _: StopEvent) -> None:
"""Handle the stopping of the charm."""
self._manager_client.flush_runner(busy=True)
manager_service.stop(self.unit.name)
manager_service.cleanup(self.unit.name)

def _install_deps(self) -> None:
"""Install dependences for the charm."""
Expand Down
51 changes: 51 additions & 0 deletions src/manager_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import fcntl
import json
import logging
import shutil
import socket
import textwrap
from pathlib import Path
Expand Down Expand Up @@ -39,6 +40,7 @@
_INSTALL_ERROR_MESSAGE = "Unable to install github-runner-manager package from source"
_SERVICE_SETUP_ERROR_MESSAGE = "Unable to enable or start the github-runner-manager application"
_SERVICE_STOP_ERROR_MESSAGE = "Unable to stop the github-runner-manager application"
_SERVICE_CLEANUP_ERROR_MESSAGE = "Unable to clean up the github-runner-manager service"

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -267,6 +269,34 @@ def stop(unit_name: str) -> None:
_stop(unit_name)


def cleanup(unit_name: str) -> None:
"""Stop, disable, and remove all artifacts for a unit's service.

Used during unit removal to ensure no stale systemd service or data remains
on the machine.

Args:
unit_name: The Juju unit name.

Raises:
RunnerManagerApplicationStopError: Failed to clean up the service.
"""
_stop(unit_name)
instance_service = _instance_service_name(unit_name)
service_file = SYSTEMD_SERVICE_PATH / f"{instance_service}.service"
try:
systemd.service_disable(instance_service)
except SystemdError:
logger.warning("Failed to disable %s, unit may already be absent", instance_service)
try:
service_file.unlink(missing_ok=True)
systemd.daemon_reload()
except (SystemdError, OSError) as err:
raise RunnerManagerApplicationStopError(_SERVICE_CLEANUP_ERROR_MESSAGE) from err

_remove_unit_data(unit_name)


def _stop(unit_name: str) -> None:
"""Stop the GitHub runner manager service.

Expand Down Expand Up @@ -375,3 +405,24 @@ def _setup_service_file(unit_name: str, config_file: Path, log_file: Path, log_l
)
service_path.parent.mkdir(parents=True, exist_ok=True)
service_path.write_text(service_file_content, "utf-8")


def _remove_unit_data(unit_name: str) -> None:
"""Remove the per-unit data directory and log file.

Raises:
RunnerManagerApplicationStopError: Failed to remove unit data.
"""
normalized = _normalized_unit(unit_name)
unit_dir = GITHUB_RUNNER_MANAGER_SERVICE_DIR / normalized
try:
shutil.rmtree(unit_dir)
except FileNotFoundError:
pass
except OSError as exc:
raise RunnerManagerApplicationStopError(_SERVICE_CLEANUP_ERROR_MESSAGE) from exc
log_file = GITHUB_RUNNER_MANAGER_SERVICE_LOG_DIR / f"{normalized}.log"
try:
log_file.unlink(missing_ok=True)
except OSError as exc:
raise RunnerManagerApplicationStopError(_SERVICE_CLEANUP_ERROR_MESSAGE) from exc
6 changes: 3 additions & 3 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,11 +307,11 @@ def test__on_config_changed_no_flush(monkeypatch: pytest.MonkeyPatch, config_opt
harness.charm._manager_client.flush_runner.assert_not_called()


def test_on_stop_busy_flush_and_stop_service(harness: Harness, monkeypatch: pytest.MonkeyPatch):
def test_on_stop_busy_flush_and_cleanup_service(harness: Harness, monkeypatch: pytest.MonkeyPatch):
"""
arrange: Set up charm with Openstack mode and runner scaler mock.
act: Trigger stop event.
assert: Runner scaler mock flushes the runners using busy mode.
assert: Runners are flushed in busy mode and service cleanup is called.
"""
state_mock = MagicMock()
harness.charm._setup_state = MagicMock(return_value=state_mock)
Expand All @@ -324,7 +324,7 @@ def test_on_stop_busy_flush_and_stop_service(harness: Harness, monkeypatch: pyte
harness.charm._on_stop(mock_event)

manager_client_mock.flush_runner.assert_called_once_with(busy=True)
mock_manager_service.stop.assert_called_once()
mock_manager_service.cleanup.assert_called_once()


@pytest.mark.parametrize(
Expand Down
79 changes: 79 additions & 0 deletions tests/unit/test_manager_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,85 @@ def test_stop_with_stopped_service(mock_systemd: MagicMock):
mock_systemd.service_stop.assert_not_called()


def test_cleanup_removes_service_and_data(mock_systemd: MagicMock, patched_paths: PatchedPaths):
"""
arrange: A running service with service file, data dir, and log file on disk.
act: Run cleanup.
assert: Service is stopped, disabled, service file removed, daemon reloaded,
data dir and log file removed.
"""
unit_name = "test-unit/0"
normalized = "test-unit-0"
instance_service = f"github-runner-manager@{normalized}"
mock_systemd.service_running.return_value = True

service_file = patched_paths.systemd_service_path / f"{instance_service}.service"
service_file.parent.mkdir(parents=True, exist_ok=True)
service_file.write_text("mock service", encoding="utf-8")

unit_dir = patched_paths.service_dir / normalized
unit_dir.mkdir(parents=True, exist_ok=True)
(unit_dir / "config.yaml").write_text("mock config", encoding="utf-8")
(unit_dir / "http_port").write_text("55555", encoding="utf-8")

log_file = patched_paths.service_log_dir / f"{normalized}.log"
log_file.parent.mkdir(parents=True, exist_ok=True)
log_file.write_text("mock log", encoding="utf-8")

manager_service.cleanup(unit_name)

mock_systemd.service_stop.assert_called_once_with(instance_service)
mock_systemd.service_disable.assert_called_once_with(instance_service)
mock_systemd.daemon_reload.assert_called_once()
assert not service_file.exists()
assert not unit_dir.exists()
assert not log_file.exists()


def test_cleanup_idempotent_missing_files(mock_systemd: MagicMock, patched_paths: PatchedPaths):
"""
arrange: Service is not running and no files exist on disk.
act: Run cleanup.
assert: No errors raised; disable is attempted, daemon_reload still called.
"""
mock_systemd.service_running.return_value = False
mock_systemd.service_disable.side_effect = SystemdError("unit not found")

manager_service.cleanup(unit_name="test-unit/0")

mock_systemd.service_stop.assert_not_called()
mock_systemd.service_disable.assert_called_once()
mock_systemd.daemon_reload.assert_called_once()


def test_cleanup_daemon_reload_error(mock_systemd: MagicMock):
"""
arrange: systemd.daemon_reload raises SystemdError.
act: Run cleanup.
assert: RunnerManagerApplicationStopError is raised.
"""
mock_systemd.service_running.return_value = False
mock_systemd.daemon_reload.side_effect = SystemdError("Mock error")

with pytest.raises(manager_service.RunnerManagerApplicationStopError):
manager_service.cleanup(unit_name="test-unit/0")


def test_cleanup_rmtree_permission_error(
mock_systemd: MagicMock, patched_paths: PatchedPaths, monkeypatch
):
"""
arrange: shutil.rmtree raises PermissionError.
act: Run cleanup.
assert: RunnerManagerApplicationStopError is raised.
"""
mock_systemd.service_running.return_value = False
monkeypatch.setattr("shutil.rmtree", MagicMock(side_effect=PermissionError("denied")))

with pytest.raises(manager_service.RunnerManagerApplicationStopError):
manager_service.cleanup(unit_name="test-unit/0")


# 2026-01-19 Skip the mocks fixture to test the actual ensure_http_port_for_unit implementation.
# The mocks fixture (see conftest.py) normally patches this function to return 55555.
@pytest.mark.nomocks
Expand Down
Loading