From e780154012f8493486e97a747451282da03e34eb Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 19 Apr 2026 08:05:09 +0000 Subject: [PATCH] Handle GitHub metadata timeout without failing webhook Co-authored-by: Armen Zambrano G. --- src/github_sdk.py | 102 ++++++++++++++++++++++++++++----------- tests/test_github_sdk.py | 37 ++++++++++++++ 2 files changed, 112 insertions(+), 27 deletions(-) diff --git a/src/github_sdk.py b/src/github_sdk.py index cf28d01..bcd1504 100644 --- a/src/github_sdk.py +++ b/src/github_sdk.py @@ -16,6 +16,15 @@ class GithubSentryError(Exception): pass +def _repo_from_run_url(run_url): + try: + parts = run_url.split("/") + repos_index = parts.index("repos") + return f"{parts[repos_index + 1]}/{parts[repos_index + 2]}" + except (AttributeError, ValueError, IndexError): + return None + + def get_uuid(): return uuid.uuid4().hex @@ -46,41 +55,80 @@ def _fetch_github(self, url): req.raise_for_status() return req + def _default_metadata(self, job): + tags = { + # e.g. success, failure, skipped + "job_status": job["conclusion"], + "commit": job.get("head_sha"), + "run_attempt": job["run_attempt"], # Rerunning a job + } + repo = _repo_from_run_url(job.get("run_url")) + if repo: + tags["repo"] = repo + return { + "author": {}, + "data": {"job": job["html_url"]}, + "tags": tags, + } + def _get_extra_metadata(self, job): # XXX: This is the slowest call - runs = self._fetch_github(job["run_url"]).json() - workflow = self._fetch_github(runs["workflow_url"]).json() - repo = runs["repository"]["full_name"] - meta = { - # "workflow_name": workflow["name"], - "author": runs["head_commit"]["author"], - # https://getsentry.atlassian.net/browse/TET-22 - # Tags are not linkified externally, plain text data can be selected in browsers and opened - "data": { - "job": job["html_url"], + meta = self._default_metadata(job) + try: + runs = self._fetch_github(job["run_url"]).json() + except requests.RequestException as error: + logging.warning( + "Failed to fetch GitHub run metadata for %s. Sending minimal trace metadata.", + job.get("run_url"), + exc_info=error, + ) + return meta + + meta["author"] = runs.get("head_commit", {}).get("author", {}) + meta["tags"].update( + { + "branch": runs.get("head_branch"), + "commit": runs.get("head_sha", meta["tags"].get("commit")), + "run_attempt": runs.get("run_attempt", meta["tags"]["run_attempt"]), + "event": runs.get("event"), }, - "tags": { - # e.g. success, failure, skipped - "job_status": job["conclusion"], - "branch": runs["head_branch"], - "commit": runs["head_sha"], - "repo": repo, - "run_attempt": runs["run_attempt"], # Rerunning a job - "event": runs["event"], + ) + repo = runs.get("repository", {}).get("full_name") + if repo: + meta["tags"]["repo"] = repo + + workflow_url = runs.get("workflow_url") + if workflow_url: + try: + workflow = self._fetch_github(workflow_url).json() + except requests.RequestException as error: + logging.warning( + "Failed to fetch GitHub workflow metadata for %s. Continuing without workflow tag.", + workflow_url, + exc_info=error, + ) + else: + workflow_path = workflow.get("path") # It allows querying jobs within the same workflow (e.g. foo.yml) - "workflow": workflow["path"].rsplit("/")[-1], - }, - } - if runs.get("pull_requests"): - pr_number = runs["pull_requests"][0]["number"] - meta["data"]["pr"] = f"https://github.com/{repo}/pull/{pr_number}" - meta["tags"]["pull_request"] = pr_number + if workflow_path: + meta["tags"]["workflow"] = workflow_path.rsplit("/")[-1] + + pull_requests = runs.get("pull_requests") or [] + if pull_requests: + pr_number = pull_requests[0].get("number") + if pr_number: + repo = meta["tags"].get("repo") + if repo: + meta["data"]["pr"] = f"https://github.com/{repo}/pull/{pr_number}" + meta["tags"]["pull_request"] = pr_number if job["conclusion"] == "failure": failing_steps = [ - step for step in job["steps"] if step["conclusion"] == "failure" + step for step in job.get("steps", []) if step.get("conclusion") == "failure" ] if len(failing_steps) > 0: - meta["tags"]["failing_step"] = failing_steps[0]["name"] + meta["tags"]["failing_step"] = failing_steps[0].get("name") + + meta["tags"] = {key: value for key, value in meta["tags"].items() if value is not None} return meta diff --git a/tests/test_github_sdk.py b/tests/test_github_sdk.py index 7f7e401..697e269 100644 --- a/tests/test_github_sdk.py +++ b/tests/test_github_sdk.py @@ -2,6 +2,7 @@ import sys from datetime import datetime +from unittest.mock import Mock from unittest.mock import patch import pytest @@ -158,3 +159,39 @@ def test_send_trace( # resp.request.body # == b"\x1f\x8b\x08\x00\xf1\x16}b\x02\xff\xb5TM\x8f\xd30\x10\xbd\xef\xaf\x88|\x02\xa9m\x1c\xc7\x89\x93H\x08\xd0\x8a;\x12\x9c@\xa8\x9a\xd8\xe3&\xbb\xf9\"vX\xaa\xaa\xff\x1d{\xdb\xee\x97\x96n\xcb\x8aS\xd3\x99\xf1\xf8\xbd7o\xbc\xd9^l\x88]\x0fH\nbG\xe8\x0cH[\xf7\x1d\x99\x11\xd9w\x16;\xbb\xdc'a\x18\x9aZ\x82O\x86W\xe6\xb6\xa2\xc1ne+RD\x19\xa7\xbe\r\xfe\xf2\xf5\xb5r\xd5\t \x139H\x95g\x8c\xcbRC\x96P\xceh\x14K]\xea\xac\x14\xee\xf4\xb3\x97>\xfcW\x10=\xdebP\x81EcM\xf0\x86\xbe=\xe0\xfam\r)6\xbe\\\xa2\xff0\x03t\xbb\x9b\x81\xd3He\xb1\x14()\xcf\x13\xbdk*q\x97\xcd\xa2\x92\x96\x08)\xa0\xd2<\x8b\x04\x08\xaeb\xa6\x04Ms\x9a)\xcd\x1e\xe1r\xadgD\x81\x05\x7f\xc3U_\xbahe\xed`\x8a0\\\xd5\xb6\x9a\xca\x85\xec\xdbp\x85\xd68\xde\xe3:\xdc\xff\x8cSg\xc2$KD\xc2\x04O\xe8{Y\xa1\xbc^\x9a\xa9\xb6\xb8\xd4\xbd\x9c\xcc;;N\xbe\xf50\x9e\xd8q\x98\x9a&\x8c\xe3\x98\x0b\xb2\x9d\x91~\xf8\x9b4\n\x8d\x1c\xeb\xe1\x98z\xc6\x82\x9d\x9cv\xa4\xbf&[\xd7l28zz\x1d\xb4\x9e\xf5\xc7\xaaE\x15|\xb2\xa8\xd7\xae\x18[\xa8\x1b\xaf\xa9\x8f.\xd0G#\xf6a\xe5\xa3\x1e\xa8\x07\xe3\xfa\x8d\xce#u\xeb\xee\x80\xd6#c\x94\xb19\xe5s\x9a~\x8d\xf2\"aE$\xbeyY\x9f/a\xb4\xa0I\x11\xefJ`e\xf6R/\xefp\x9aIJ4\xc6\xa5K\xe7\rY\x1d\xe0\x84\xa0\xd4\xdc\xa1\xae\xbb\xd5\xbc\x81\xb5c\xe1\xad\xd1\xb6\xb5\xf5\xd4U.R\x16e\x90s\x95\x01\x95\x8aQ\xe7\x88(B\x10\xaa\xc44\x93\x89\x88Qs\xe5\xce\x8c8\xf4\xee\xc4S\xcd}f\xea\x96`-\xb6\x83k\x19\xcd\xc8M?^\xeb\xa6\xbf\xf1\x08\x1c\xa6\xc1:8\xb8X\xb7\x8d\x1f\xa5\x9b\xd0r\xc4\x9f\x93\xe3H\x8a\xdbQyq\x9c+\x1d\x87\xef\x9b\xdd\xcc\xbe\xa0\r\xa6!\xf0N\x9a\x1d\x04\x7f\x14\x1b`\xf4\x1bt\xd4\xcc\xf7I*X\xaaK\x0e,\xe6\x11\x17B\x92\xd3\xa6\x91.(\xa5G&\xb2+c\xf4\xae\xec\x8c\xed\xd9\xce\xf6T?;=\x82U%\xc7E\xdd?\xf0\xf3n\xb3\xe7\x95m\x9b\xb9\xed\xe7u\x0b+,\x1a\xf0\x06\xbd\x97\xe4\x9f\xce\x9e'\x1d\x08!RT\x11S\x00\x9c\xe7\xe5i\xd2=\xd0\xe4XY\x92\xbdN\xba\xde\xd8\xe0\xd2\xbf\x19\xfdd\x83;7\x1e\xc4y>{\x1e\xfd\x14\x9da\xb4\x8e\x05\xd39\xcf3x\x89\xfeaI_\xa0\xbf/K^E\xff\xb2o\x87\xc6=5\x8f\xd7\xe4I\xf4<\xba\x8e\xa2\xdbWT\x98\xe8\x88\xbb\xa7\xe1D\xba\xc9\xff\xa4\xfbc{\xf1\x07Hk>,{\x07\x00\x00" # ) + + +def test_trace_generation_falls_back_on_run_timeout(jobA_job): + client = GithubClient(dsn=DSN, token=TOKEN) + with patch.object(client, "_fetch_github", side_effect=requests.ConnectTimeout): + trace = client._generate_trace(jobA_job) + + assert trace["user"] == {} + assert trace["contexts"]["trace"]["data"] == {"job": jobA_job["html_url"]} + assert trace["tags"]["job_status"] == jobA_job["conclusion"] + assert trace["tags"]["commit"] == jobA_job["head_sha"] + assert trace["tags"]["run_attempt"] == jobA_job["run_attempt"] + assert trace["tags"]["repo"] == "getsentry/sentry" + assert "workflow" not in trace["tags"] + assert "event" not in trace["tags"] + assert "branch" not in trace["tags"] + + +def test_trace_generation_continues_when_workflow_fetch_times_out(jobA_job, jobA_runs): + client = GithubClient(dsn=DSN, token=TOKEN) + runs_response = Mock() + runs_response.json.return_value = jobA_runs + with patch.object( + client, + "_fetch_github", + side_effect=[runs_response, requests.ConnectTimeout], + ): + trace = client._generate_trace(jobA_job) + + assert trace["user"] == jobA_runs["head_commit"]["author"] + assert trace["tags"]["job_status"] == jobA_job["conclusion"] + assert trace["tags"]["branch"] == jobA_runs["head_branch"] + assert trace["tags"]["event"] == jobA_runs["event"] + assert trace["tags"]["repo"] == jobA_runs["repository"]["full_name"] + assert trace["tags"]["run_attempt"] == jobA_runs["run_attempt"] + assert "workflow" not in trace["tags"]