Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import logging

from celery import shared_task
from django.db import InterfaceError, OperationalError

from bk_plugin_framework.envs import settings
from bk_plugin_framework.hub import VersionHub
Expand All @@ -22,6 +23,9 @@

logger = logging.getLogger("bk_plugin")

# 瞬时数据库连接类错误:DB 抖动可通过重试自愈,不应直接把调度判定为失败
TRANSIENT_DB_EXC = (OperationalError, InterfaceError)


def _set_schedule_state(trace_id: str, state: State):
try:
Expand All @@ -30,12 +34,27 @@ def _set_schedule_state(trace_id: str, state: State):
logger.exception("[execute] set schedule state error")


@shared_task(ignore_result=True)
def schedule(trace_id: str):
@shared_task(bind=True, ignore_result=True, max_retries=6, default_retry_delay=5)
def schedule(self, trace_id: str):
local.set_trace_id(trace_id)

try:
schedule = Schedule.objects.get(trace_id=trace_id)
except TRANSIENT_DB_EXC as exc:
# DB 瞬时不可达:重试本次轮询而非判失败,避免误杀运行中的插件;重试用尽才兜底置失败
if self.request.retries >= self.max_retries:
logger.error(
"[schedule_task] db unreachable, give up fetching schedule obj %s after %s retries"
% (trace_id, self.request.retries)
)
_set_schedule_state(trace_id=trace_id, state=State.FAIL)
return
countdown = min(2 ** self.request.retries * 5, 60)
logger.warning(
"[schedule_task] transient db error when fetching schedule obj %s (retry=%s), retry in %ss: %s"
% (trace_id, self.request.retries, countdown, exc)
)
raise self.retry(exc=exc, countdown=countdown)
except Exception:
logger.exception("[schedule_task] fetch schedule obj %s failed" % trace_id)
_set_schedule_state(trace_id=trace_id, state=State.FAIL)
Expand Down
40 changes: 40 additions & 0 deletions bk-plugin-framework/tests/runtime/schedule/celery/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from unittest.mock import MagicMock, patch

import pytest
from django.db import OperationalError

from bk_plugin_framework.kit import State
from bk_plugin_framework.runtime.schedule.celery import tasks
Expand Down Expand Up @@ -108,3 +109,42 @@ def test_schedule__execute_success(self, trace_id, schedule_id):
plugin_cls=VersionHub.all_plugins().get(schedule_obj.plugin_version), schedule=schedule_obj
)
Schedule.objects.filter.assert_not_called()

def test_schedule__transient_db_error_will_retry(self, trace_id, schedule_id):
Schedule = MagicMock()
db_err = OperationalError("(2003, \"Can't connect to MySQL server (110)\")")
Schedule.objects.get = MagicMock(side_effect=db_err)

class _Retry(Exception):
pass

with patch("bk_plugin_framework.runtime.schedule.celery.tasks.Schedule", Schedule):
with patch.object(tasks.schedule, "retry", side_effect=_Retry) as mock_retry:
with pytest.raises(_Retry):
tasks.schedule(trace_id)

assert local.get_trace_id() == trace_id

Schedule.objects.get.assert_called_once_with(trace_id=trace_id)
# 瞬时数据库连接错误应触发重试,而不是把调度直接置为失败
mock_retry.assert_called_once()
assert mock_retry.call_args[1]["exc"] is db_err
assert mock_retry.call_args[1]["countdown"] == 5
Schedule.objects.filter.assert_not_called()

def test_schedule__transient_db_error_set_fail_when_retries_exhausted(self, trace_id, schedule_id):
Schedule = MagicMock()
Schedule.objects.get = MagicMock(side_effect=OperationalError())

with patch("bk_plugin_framework.runtime.schedule.celery.tasks.Schedule", Schedule):
with patch.object(tasks.schedule, "retry") as mock_retry:
with patch.object(tasks.schedule, "max_retries", 0):
tasks.schedule(trace_id)

assert local.get_trace_id() == trace_id

Schedule.objects.get.assert_called_once_with(trace_id=trace_id)
# 重试已耗尽:不再重试,兜底置为失败
mock_retry.assert_not_called()
Schedule.objects.filter.assert_called_once_with(trace_id=trace_id)
Schedule.objects.filter(trace_id=trace_id).update.assert_called_once_with(state=State.FAIL.value)
Loading