Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/technical_documentation/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ This component is an imported library which is shared across multiple GitHub too

### Historic Usage Data

This section gathers data from AWS S3. The Copilot usage endpoints have a limitation where they only return the last 100 days worth of information. To get around this, the project has an AWS Lambda function which runs weekly and stores data within an S3 bucket.
This section gathers data from AWS S3. The Copilot usage endpoints have a limitation where they only return the last 28 days worth of information. To get around this, the project has an AWS Lambda function which runs weekly and stores data within an S3 bucket.

### Copilot Teams Data
### Copilot Teams Data (Deprecated - functionality removed but may be restored via alternative methods)

This section gathers a list of teams within the organisation with Copilot data and updates the S3 bucket accordingly. This allows all relevant teams to be displayed within the dashboard.
2 changes: 2 additions & 0 deletions docs/technical_documentation/team_usage.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Copilot Team Usage

Note: This functionality has been removed as of 19th March 2026 as the endpoint used to fetch metrics for team usage is being sunsetted. However, it may be restored via alternative methods in the future.

## Overview

This section of the project leverages GitHub OAuth2 for user authentication, granting access to essential data.
Expand Down
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ six = "^1.17.0"
urllib3 = "^2.6.3"

[tool.poetry.group.dev.dependencies]
black = "^26.3.1"
ruff = "^0.6.5"
pylint = "^3.2.7"
mypy = "^1.11.2"
pytest = "^8.4.1"
pytest-cov = "^6.2.1"
pytest-xdist = "^3.8.0"
black = "^26.3.1"

[tool.poetry.group.docs.dependencies]
mkdocs = "^1.6.0"
Expand Down Expand Up @@ -111,4 +111,4 @@ warn_redundant_casts = "True"
disallow_untyped_defs = "True"
disallow_untyped_calls = "True"
disallow_incomplete_defs = "True"
strict_equality = "True"
strict_equality = "True"
259 changes: 27 additions & 232 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
import json
import logging
import os
from typing import Any, Optional
from typing import Any

import boto3
import github_api_toolkit
import requests
from botocore.exceptions import ClientError
from requests import Response

# GitHub Organisation
org = os.getenv("GITHUB_ORG")
Expand All @@ -29,7 +29,7 @@

# AWS Bucket Path
BUCKET_NAME = f"{account}-copilot-usage-dashboard"
OBJECT_NAME = "historic_usage_data.json"
OBJECT_NAME = "org_history.json"

logger = logging.getLogger()

Expand Down Expand Up @@ -57,64 +57,6 @@
# }


def get_copilot_team_date(gh: github_api_toolkit.github_interface, page: int) -> list:
"""Gets a list of GitHub Teams with Copilot Data for a given API page.

Args:
gh (github_api_toolkit.github_interface): An instance of the github_interface class.
page (int): The page number of the API request.

Returns:
list: A list of GitHub Teams with Copilot Data.
"""
copilot_teams = []

response = gh.get(f"/orgs/{org}/teams", params={"per_page": 100, "page": page})
teams = response.json()
for team in teams:
usage_data = gh.get(f"/orgs/{org}/team/{team['name']}/copilot/metrics")

if not isinstance(usage_data, Response):

# If the response is not a Response object, no copilot data is available for this team
# We can then skip this team

# We don't log this as an error, as it is expected and it'd be too noisy within logs

continue

# If the response has data, append the team to the list
# If there is no data, .json() will return an empty list
if usage_data.json():

team_name = team.get("name", "")
team_slug = team.get("slug", "")
team_description = team.get("description", "")
team_html_url = team.get("html_url", "")

logger.info(
"Team %s has Copilot data",
team_name,
extra={
"team_name": team_name,
"team_slug": team_slug,
"team_description": team_description,
"team_html_url": team_html_url,
},
)

copilot_teams.append(
{
"name": team_name,
"slug": team_slug,
"description": team_description,
"url": team_html_url,
}
)

return copilot_teams


def get_and_update_historic_usage(
s3: boto3.client, gh: github_api_toolkit.github_interface, write_data_locally: bool
) -> tuple:
Expand All @@ -129,8 +71,8 @@ def get_and_update_historic_usage(
tuple: A tuple containing the updated historic usage data and a list of dates added.
"""
# Get the usage data
usage_data = gh.get(f"/orgs/{org}/copilot/metrics")
usage_data = usage_data.json()
api_response = gh.get(f"/orgs/{org}/copilot/metrics/reports/organization-28-day/latest").json()
usage_data = requests.get(api_response["download_links"][0], timeout=30).json()["day_totals"]

logger.info("Usage data retrieved")

Expand All @@ -139,133 +81,36 @@ def get_and_update_historic_usage(
historic_usage = json.loads(response["Body"].read().decode("utf-8"))
except ClientError as e:
logger.error("Error getting %s: %s. Using empty list.", OBJECT_NAME, e)

historic_usage = []

dates_added = []

# Append the new usage data to the historic_usage_data.json
for date in usage_data:
if not any(d["date"] == date["date"] for d in historic_usage):
historic_usage.append(date)

dates_added.append(date["date"])
for day in usage_data:
if not any(d["day"] == day["day"] for d in historic_usage):
historic_usage.append(day)
dates_added.append(day["day"])
logger.info("Added data for day %s", day["day"])

logger.info(
"New usage data added to %s",
OBJECT_NAME,
extra={"no_days_added": len(dates_added), "dates_added": dates_added},
)
sorted_historic_usage = sorted(historic_usage, key=lambda x: x["day"])

if not write_data_locally:
# Write the updated historic_usage to historic_usage_data.json
update_s3_object(s3, BUCKET_NAME, OBJECT_NAME, historic_usage)
update_s3_object(s3, BUCKET_NAME, OBJECT_NAME, sorted_historic_usage)
else:
local_path = f"output/{OBJECT_NAME}"
os.makedirs("output", exist_ok=True)
with open(local_path, "w", encoding="utf-8") as f:
json.dump(historic_usage, f, indent=4)
json.dump(sorted_historic_usage, f, indent=4)
logger.info("Historic usage data written locally to %s (S3 skipped)", local_path)

return historic_usage, dates_added


def get_and_update_copilot_teams(
s3: boto3.client, gh: github_api_toolkit.github_interface, write_data_locally: bool
) -> list:
"""Get and update GitHub Teams with Copilot Data.

Args:
s3 (boto3.client): An S3 client.
gh (github_api_toolkit.github_interface): An instance of the github_interface class.
write_data_locally (bool): Whether to write data locally instead of to an S3 bucket.

Returns:
list: A list of GitHub Teams with Copilot Data.
"""
logger.info("Getting GitHub Teams with Copilot Data")

copilot_teams = []

response = gh.get(f"/orgs/{org}/teams", params={"per_page": 100})

# Get the last page of teams
try:
last_page = int(response.links["last"]["url"].split("=")[-1])
except KeyError:
last_page = 1

for page in range(1, last_page + 1):
page_teams = get_copilot_team_date(gh, page)

copilot_teams = copilot_teams + page_teams

logger.info(
"Fetched GitHub Teams with Copilot Data",
extra={"no_teams": len(copilot_teams)},
"Usage data written to %s: %d days added (%s)",
OBJECT_NAME,
len(dates_added),
dates_added,
)

if not write_data_locally:
update_s3_object(s3, BUCKET_NAME, "copilot_teams.json", copilot_teams)
else:
local_path = "output/copilot_teams.json"
os.makedirs("output", exist_ok=True)
with open(local_path, "w", encoding="utf-8") as f:
json.dump(copilot_teams, f, indent=4)
logger.info("Copilot teams data written locally to %s (S3 skipped)", local_path)

return copilot_teams


def create_dictionary(
gh: github_api_toolkit.github_interface, copilot_teams: list, existing_team_history: list
) -> list:
"""Create a dictionary for quick lookup of existing team data using the `name` field.

Args:
gh (github_api_toolkit.github_interface): An instance of the github_interface class.
copilot_teams (list): List of teams with Copilot data.
existing_team_history (list): List of existing team history data.

Returns:
list: A list of dictionaries containing team data and their history.
"""
existing_team_data_map = {
single_team["team"]["name"]: single_team for single_team in existing_team_history
}

# Iterate through identified teams
for team in copilot_teams:
team_name = team.get("name", "")
if not team_name:
logger.warning("Skipping team with no name")
continue

# Determine the last known date for the team
last_known_date = None
if team_name in existing_team_data_map:
existing_dates = [entry["date"] for entry in existing_team_data_map[team_name]["data"]]
if existing_dates:
last_known_date = max(existing_dates) # Get the most recent date

# Assign the last known date to the `since` query parameter
query_params = {}
if last_known_date:
query_params["since"] = last_known_date

single_team_history = get_team_history(gh, team_name, query_params)
if not single_team_history:
logger.info("No new history found for team %s", team_name)
continue

# Append new data to the existing team history
new_team_data = single_team_history
if team_name in existing_team_data_map:
existing_team_data_map[team_name]["data"].extend(new_team_data)
else:
existing_team_data_map[team_name] = {"team": team, "data": new_team_data}

return list(existing_team_data_map.values())
return sorted_historic_usage, dates_added


def update_s3_object(
Expand Down Expand Up @@ -298,31 +143,6 @@ def update_s3_object(
return False


def get_team_history(
gh: github_api_toolkit.github_interface, team: str, query_params: Optional[dict] = None
) -> list[dict]:
"""Gets the team metrics Copilot data through the API.
Note - This endpoint will only return results for a given day if the team had
five or more members with active Copilot licenses on that day,
as evaluated at the end of that day.

Args:
gh (github_api_toolkit.github_interface): An instance of the github_interface class.
team (str): Team name.
query_params (dict): Additional query parameters for the API request.

Returns:
list[dict]: A team's GitHub Copilot metrics or None if an error occurs.
"""
response = gh.get(f"/orgs/{org}/team/{team}/copilot/metrics", params=query_params)

if not isinstance(response, Response):
# If the response is not a Response object, no copilot data is available for this team
# We can return None which is then handled by the calling function
return None
return response.json()


def get_dict_value(dictionary: dict, key: str) -> Any:
"""Gets a value from a dictionary and raises an exception if it is not found.

Expand Down Expand Up @@ -406,6 +226,13 @@ def handler(event: dict, context) -> str: # pylint: disable=unused-argument, to
logging.basicConfig(
filename="debug.log",
filemode="w",
format="%(asctime)s %(levelname)s %(message)s",
)
else:
# Ensure INFO logs show in the terminal when not logging to a file
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(message)s",
)

# Create an S3 client
Expand Down Expand Up @@ -437,37 +264,6 @@ def handler(event: dict, context) -> str: # pylint: disable=unused-argument, to
# Copilot Usage Data (Historic)
historic_usage, dates_added = get_and_update_historic_usage(s3, gh, write_data_locally)

# GitHub Teams with Copilot Data
copilot_teams = get_and_update_copilot_teams(s3, gh, write_data_locally)

logger.info("Getting history of each team identified previously")

# Retrieve existing team history from S3
try:
response = s3.get_object(Bucket=BUCKET_NAME, Key="teams_history.json")
existing_team_history = json.loads(response["Body"].read().decode("utf-8"))
except ClientError as e:
logger.warning("Error retrieving existing team history: %s", e)
existing_team_history = []

logger.info("Existing team history has %d entries", len(existing_team_history))

if not write_data_locally:
# Convert to dictionary for quick lookup
updated_team_history = create_dictionary(gh, copilot_teams, existing_team_history)

# Write updated team history to S3
# This line isn't covered by tests as it's painful to get to.
# The function itself is tested though.
update_s3_object(s3, BUCKET_NAME, "teams_history.json", updated_team_history)
else:
local_path = "output/teams_history.json"
os.makedirs("output", exist_ok=True)
updated_team_history = create_dictionary(gh, copilot_teams, existing_team_history)
with open(local_path, "w", encoding="utf-8") as f:
json.dump(updated_team_history, f, indent=4)
logger.info("Team history written locally to %s (S3 skipped)", local_path)

logger.info(
"Process complete",
extra={
Expand All @@ -476,14 +272,13 @@ def handler(event: dict, context) -> str: # pylint: disable=unused-argument, to
"dates_added": dates_added,
"no_dates_before": len(historic_usage) - len(dates_added),
"no_dates_after": len(historic_usage),
"no_copilot_teams": len(copilot_teams),
},
)

return "Github Data logging is now complete."


# # Dev Only
# # Uncomment the following line to run the script locally
# Dev Only
# Uncomment the following line to run the script locally
# if __name__ == "__main__":
# handler(None, None)
Loading
Loading