Skip to content

Commit aa7d274

Browse files
committed
feat: support no-planner mode in PressureReconciler and rewire http_server
PressureReconciler now accepts planner_client=None. When no planner is configured, the create loop sets _last_pressure to min_pressure and blocks, while the reconcile loop maintains runners via timer-based reconciliation using the static min_pressure value. http_server endpoints now use RunnerManager directly instead of the legacy RunnerScaler, building it once at startup rather than per-request. The /runner/check response maintains backward compatibility with the existing RunnerInfo JSON shape consumed by manager_client.py. build_runner_manager is now public for use by both the http_server and other callers that need a RunnerManager from ApplicationConfiguration.
1 parent 5f6d904 commit aa7d274

7 files changed

Lines changed: 199 additions & 72 deletions

File tree

docs/changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
This changelog documents user-relevant changes to the GitHub runner charm.
44

5+
## 2026-03-18
6+
7+
- PressureReconciler now supports no-planner mode: when no planner relation is configured, it uses `base-virtual-machines` as static pressure to maintain the configured minimum runner count.
8+
- HTTP server endpoints (`/runner/check`, `/runner/flush`) now use `RunnerManager` directly instead of the legacy `RunnerScaler`.
9+
510
## 2026-03-17
611

712
- Pause pressure reconciler create loop after zero-create attempts until the next reconcile run re-syncs state and re-enables creation, reducing repeated retries during OpenStack quota and similar create failures.

github-runner-manager/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
[project]
55
name = "github-runner-manager"
6-
version = "0.15.1"
6+
version = "0.16.0"
77
authors = [
88
{ name = "Canonical IS DevOps", email = "is-devops-team@canonical.com" },
99
]

github-runner-manager/src/github_runner_manager/cli.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from github_runner_manager.manager.pressure_reconciler import (
2121
PressureReconciler,
2222
build_pressure_reconciler,
23+
build_runner_manager,
2324
)
2425
from github_runner_manager.reconcile_service import start_reconcile_service
2526
from github_runner_manager.thread_manager import ThreadManager
@@ -128,10 +129,13 @@ def main( # pylint: disable=too-many-arguments, too-many-positional-arguments
128129
config = ApplicationConfiguration.from_yaml_file(StringIO(config_file.read()))
129130
lock = Lock()
130131

132+
combinations = config.non_reactive_configuration.combinations
133+
runner_manager = build_runner_manager(config, combinations[0])
134+
131135
thread_manager = ThreadManager()
132136
http_server_args = FlaskArgs(host=host, port=port, debug=debug)
133137
thread_manager.add_thread(
134-
target=partial(start_http_server, config, lock, http_server_args),
138+
target=partial(start_http_server, runner_manager, lock, http_server_args),
135139
daemon=True,
136140
)
137141

github-runner-manager/src/github_runner_manager/http_server.py

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,21 @@
11
# Copyright 2026 Canonical Ltd.
22
# See LICENSE file for licensing details.
33

4-
"""The HTTP server for github-runner-manager.
4+
"""The HTTP server for github-runner-manager."""
55

6-
The HTTP server for request to the github-runner-manager.
7-
"""
8-
9-
import dataclasses
106
import json
117
from dataclasses import dataclass
128
from threading import Lock
139

1410
from flask import Flask, request
1511
from prometheus_client import generate_latest
1612

17-
from github_runner_manager.configuration import ApplicationConfiguration
1813
from github_runner_manager.errors import CloudError, LockError
19-
from github_runner_manager.manager.runner_manager import FlushMode
20-
from github_runner_manager.reconcile_service import get_runner_scaler
14+
from github_runner_manager.manager.runner_manager import FlushMode, RunnerManager
15+
from github_runner_manager.platform.platform_provider import PlatformRunnerState
2116

2217
APP_CONFIG_NAME = "app_config"
23-
OPENSTACK_CONFIG_NAME = "openstack_config"
18+
RUNNER_MANAGER_CONFIG_NAME = "runner_manager"
2419

2520
app = Flask(__name__)
2621

@@ -45,15 +40,43 @@ def check_runner() -> tuple[str, int]:
4540
Returns:
4641
Information on the runners in JSON format.
4742
"""
48-
app_config: ApplicationConfiguration = app.config[APP_CONFIG_NAME]
43+
runner_manager: RunnerManager = app.config[RUNNER_MANAGER_CONFIG_NAME]
4944
app.logger.info("Checking runners...")
50-
runner_scaler = get_runner_scaler(app_config)
5145
try:
52-
runner_info = runner_scaler.get_runner_info()
46+
runners = runner_manager.get_runners()
5347
except CloudError as err:
5448
app.logger.exception("Cloud error encountered while getting runner info")
5549
return (str(err), 500)
56-
return (json.dumps(dataclasses.asdict(runner_info)), 200)
50+
51+
online = 0
52+
busy = 0
53+
offline = 0
54+
unknown = 0
55+
runner_names: list[str] = []
56+
busy_runner_names: list[str] = []
57+
for r in runners:
58+
if r.platform_state == PlatformRunnerState.BUSY:
59+
busy += 1
60+
online += 1
61+
runner_names.append(r.name)
62+
busy_runner_names.append(r.name)
63+
elif r.platform_state == PlatformRunnerState.IDLE:
64+
online += 1
65+
runner_names.append(r.name)
66+
elif r.platform_state == PlatformRunnerState.OFFLINE:
67+
offline += 1
68+
else:
69+
unknown += 1
70+
71+
runner_info = {
72+
"online": online,
73+
"busy": busy,
74+
"offline": offline,
75+
"unknown": unknown,
76+
"runners": runner_names,
77+
"busy_runners": busy_runner_names,
78+
}
79+
return (json.dumps(runner_info), 200)
5780

5881

5982
@app.route("/runner/flush", methods=["POST"])
@@ -66,7 +89,7 @@ def flush_runner() -> tuple[str, int]:
6689
Returns:
6790
A empty response.
6891
"""
69-
app_config = app.config[APP_CONFIG_NAME]
92+
runner_manager: RunnerManager = app.config[RUNNER_MANAGER_CONFIG_NAME]
7093

7194
flush_busy_str = request.args.get("flush-busy")
7295
flush_busy = False
@@ -76,15 +99,13 @@ def flush_runner() -> tuple[str, int]:
7699
lock = _get_lock()
77100
with lock:
78101
app.logger.info("Flushing runners...")
79-
runner_scaler = get_runner_scaler(app_config)
80102
app.logger.info("Flushing busy: %s", flush_busy)
81103
flush_mode = FlushMode.FLUSH_BUSY if flush_busy else FlushMode.FLUSH_IDLE
82104
try:
83-
num_flushed = runner_scaler.flush(flush_mode)
105+
runner_manager.flush_runners(flush_mode)
84106
except CloudError as err:
85107
app.logger.exception("Cloud error encountered while flushing runners")
86108
return (str(err), 500)
87-
app.logger.info("Flushed %s runners", num_flushed)
88109
return ("", 204)
89110

90111

@@ -130,22 +151,22 @@ class FlaskArgs:
130151

131152

132153
def start_http_server(
133-
app_config: ApplicationConfiguration,
154+
runner_manager: RunnerManager,
134155
lock: Lock,
135156
flask_args: FlaskArgs,
136157
) -> None:
137158
"""Start the HTTP server for interacting with the github-runner-manager service.
138159
139160
Args:
140-
app_config: The application configuration.
161+
runner_manager: The runner manager for managing runners.
141162
lock: The lock representing modification access to the managed set of runners.
142163
flask_args: The arguments for the flask HTTP server.
143164
"""
144165
app.logger.info("Starting the server...")
145166
# The lock is passed from the caller, hence the need to update the global variable.
146167
global _lock # pylint: disable=global-statement
147168
_lock = lock
148-
app.config[APP_CONFIG_NAME] = app_config
169+
app.config[RUNNER_MANAGER_CONFIG_NAME] = runner_manager
149170
app.run(
150171
host=flask_args.host,
151172
port=flask_args.port,

github-runner-manager/src/github_runner_manager/manager/pressure_reconciler.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ class PressureReconciler: # pylint: disable=too-few-public-methods,too-many-ins
112112
def __init__(
113113
self,
114114
manager: RunnerManager,
115-
planner_client: PlannerClient,
115+
planner_client: PlannerClient | None,
116116
config: PressureReconcilerConfig,
117117
lock: Lock,
118118
) -> None:
@@ -122,6 +122,7 @@ def __init__(
122122
manager: Runner manager interface for creating, cleaning up,
123123
and listing runners.
124124
planner_client: Client used to stream pressure updates.
125+
None when no planner relation is configured.
125126
config: Reconciler configuration.
126127
lock: Shared lock to serialize operations with other reconcile loops.
127128
"""
@@ -140,6 +141,14 @@ def start_create_loop(self) -> None:
140141
with self._lock:
141142
self._runner_count = len(self._manager.get_runners())
142143
logger.info("Create loop: initial sync, _runner_count=%s", self._runner_count)
144+
if self._planner is None:
145+
self._last_pressure = self._config.min_pressure
146+
logger.info(
147+
"Create loop: no planner configured, using min_pressure=%s",
148+
self._config.min_pressure,
149+
)
150+
self._stop.wait()
151+
return
143152
while not self._stop.is_set():
144153
try:
145154
for update in self._planner.stream_pressure(self._config.flavor_name):
@@ -413,12 +422,15 @@ def build_pressure_reconciler(config: ApplicationConfiguration, lock: Lock) -> P
413422
"Cannot build PressureReconciler: no non-reactive combinations configured."
414423
)
415424
first = combinations[0]
416-
manager = _build_runner_manager(config, first)
425+
manager = build_runner_manager(config, first)
426+
planner_client: PlannerClient | None = None
427+
if config.planner_url and config.planner_token:
428+
planner_client = PlannerClient(
429+
PlannerConfiguration(base_url=config.planner_url, token=config.planner_token)
430+
)
417431
return PressureReconciler(
418432
manager=manager,
419-
planner_client=PlannerClient(
420-
PlannerConfiguration(base_url=config.planner_url, token=config.planner_token)
421-
),
433+
planner_client=planner_client,
422434
config=PressureReconcilerConfig(
423435
flavor_name=config.name,
424436
reconcile_interval=config.reconcile_interval,
@@ -429,7 +441,7 @@ def build_pressure_reconciler(config: ApplicationConfiguration, lock: Lock) -> P
429441
)
430442

431443

432-
def _build_runner_manager(
444+
def build_runner_manager(
433445
config: ApplicationConfiguration, combination: NonReactiveCombination
434446
) -> RunnerManager:
435447
"""Build a RunnerManager from application config and a flavor/image combination.

github-runner-manager/tests/unit/manager/test_pressure_reconciler.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,3 +489,81 @@ def test_timer_reconcile_emits_reconciliation_metric(monkeypatch: pytest.MonkeyP
489489
assert event.idle_runners == 2 # IDLE + OFFLINE+HEALTHY
490490
assert event.active_runners == 1
491491
assert event.crashed_runners == 0
492+
493+
494+
def test_create_loop_no_planner_sets_min_pressure_and_blocks(monkeypatch: pytest.MonkeyPatch):
495+
"""
496+
arrange: A reconciler with no planner client and min_pressure=3.
497+
act: Call start_create_loop.
498+
assert: _last_pressure is set to min_pressure, _runner_count is synced, and stop is awaited.
499+
"""
500+
mgr = _FakeManager(runners_count=1)
501+
cfg = PressureReconcilerConfig(flavor_name="small", min_pressure=3)
502+
reconciler = PressureReconciler(mgr, planner_client=None, config=cfg, lock=Lock())
503+
504+
wait_called = {"called": False}
505+
506+
def _stop_immediately() -> None:
507+
"""Record that wait was called, then stop."""
508+
wait_called["called"] = True
509+
return None
510+
511+
monkeypatch.setattr(reconciler._stop, "wait", lambda: _stop_immediately())
512+
reconciler.start_create_loop()
513+
514+
assert reconciler._runner_count == 1
515+
assert reconciler._last_pressure == 3
516+
assert wait_called["called"]
517+
518+
519+
def test_reconcile_loop_no_planner_uses_min_pressure(monkeypatch: pytest.MonkeyPatch):
520+
"""
521+
arrange: A reconciler with no planner, min_pressure=4, and 2 existing runners.
522+
act: Run the reconcile loop once.
523+
assert: Timer reconcile uses min_pressure and creates runners to reach it.
524+
"""
525+
mgr = _FakeManager(runners_count=2)
526+
cfg = PressureReconcilerConfig(flavor_name="small", min_pressure=4, reconcile_interval=60)
527+
reconciler = PressureReconciler(mgr, planner_client=None, config=cfg, lock=Lock())
528+
reconciler._last_pressure = 4
529+
wait_calls = {"count": 0}
530+
531+
def _wait(_interval: int) -> bool:
532+
"""Return False once to enter the loop, then True to exit."""
533+
wait_calls["count"] += 1
534+
return wait_calls["count"] > 1
535+
536+
monkeypatch.setattr(reconciler._stop, "wait", _wait)
537+
reconciler.start_reconcile_loop()
538+
539+
assert mgr.cleanup_called == 1
540+
assert mgr.created_args == [2]
541+
542+
543+
def test_build_pressure_reconciler_no_planner_config(monkeypatch: pytest.MonkeyPatch):
544+
"""
545+
arrange: An ApplicationConfiguration with planner_url=None and planner_token=None.
546+
act: Call build_pressure_reconciler.
547+
assert: A PressureReconciler is returned with _planner set to None.
548+
"""
549+
from unittest.mock import MagicMock, patch
550+
551+
from github_runner_manager.manager.pressure_reconciler import build_pressure_reconciler
552+
553+
mock_config = MagicMock()
554+
mock_config.planner_url = None
555+
mock_config.planner_token = None
556+
mock_config.name = "test"
557+
mock_config.reconcile_interval = 5
558+
combination = MagicMock()
559+
combination.base_virtual_machines = 2
560+
combination.max_total_virtual_machines = 10
561+
mock_config.non_reactive_configuration.combinations = [combination]
562+
563+
with patch(
564+
"github_runner_manager.manager.pressure_reconciler.build_runner_manager"
565+
) as mock_build:
566+
mock_build.return_value = MagicMock()
567+
reconciler = build_pressure_reconciler(mock_config, Lock())
568+
569+
assert reconciler._planner is None

0 commit comments

Comments
 (0)