Skip to content

Commit 64d841f

Browse files
authored
Keh 735 add team info (#54)
* Update to remove nan rows in the dataset * Add functionality to get team history from the Github API * add documentation of the team historic metrics * add support for query params - since * update docs
1 parent 38f8b85 commit 64d841f

File tree

4 files changed

+147
-6
lines changed

4 files changed

+147
-6
lines changed

docs/team_usage.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,46 @@ A user within ONSDigital. Upon authentication, the app identifies the teams they
2323

2424
#### Admin User
2525
An enhanced regular user with the ability to search for any team. This user belongs to a specific whitelisted team, enabling them to view metrics for any team that meets the CoPilot usage data requirements.
26+
27+
## Metrics
28+
29+
### Team History Metrics
30+
The team history metrics function retrieves historical usage data for each team identified with CoPilot usage. This data includes detailed metrics about the team's activity over time. New data for a team is fetched only from the last captured date in the file.
31+
32+
#### Functionality
33+
- **Input**: The function in addition to the GitHub Client takes a team name, organisation and the optional "since" as a query parameter as input.
34+
- **Process**:
35+
- Fetches historical data for the specified team using the GitHub API.
36+
- If the since query parameter exist then fetch data only after the specified date.
37+
- Filters and organizes the data into a structured format.
38+
- **Output**: A JSON object containing the team's historical metrics, including:
39+
- Team name
40+
- Activity data
41+
- CoPilot usage statistics
42+
43+
#### Usage
44+
The historical metrics are stored in an S3 bucket as a json file (`teams_history.json`).
45+
46+
#### Example
47+
For a team named `kehdev`, the historical metrics might include:
48+
```json
49+
{
50+
"team": {
51+
"name": "kehdev",
52+
"slug": "kehdev",
53+
"description": "Team responsible for CI/CD pipelines",
54+
"url": "https://github.com/orgs/<organisation>/teams/kehdev"
55+
},
56+
"data": [
57+
{
58+
"date": "2025-07-01",
59+
"active_members": 10,
60+
"copilot_usage_hours": 50
61+
},
62+
{
63+
"date": "2025-07-02",
64+
"active_members": 12,
65+
"copilot_usage_hours": 60
66+
}
67+
]
68+
}

lambda_data_logger/main.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,3 +194,91 @@ def handler(event, context):
194194
"no_copilot_teams": len(copilot_teams),
195195
},
196196
)
197+
198+
# Get teams history
199+
team_history = []
200+
201+
logger.info("Getting history of each team identified previously")
202+
203+
# Retrieve existing team history from S3
204+
try:
205+
response = s3.get_object(Bucket=bucket_name, Key="teams_history.json")
206+
existing_team_history = json.loads(response["Body"].read().decode("utf-8"))
207+
except ClientError as e:
208+
logger.warning(f"Error retrieving existing team history: {e}")
209+
existing_team_history = []
210+
211+
logger.info(f"Existing team history has {len(existing_team_history)} entries")
212+
213+
# Create a dictionary for quick lookup of existing team data using the `name` field
214+
existing_team_data_map = {single_team["team"]["name"]: single_team for single_team in existing_team_history}
215+
216+
# Iterate through identified teams
217+
for team in copilot_teams:
218+
team_name = team.get("name", "")
219+
if not team_name:
220+
logger.warning("Skipping team with no name")
221+
continue
222+
223+
# Determine the last known date for the team
224+
last_known_date = None
225+
if team_name in existing_team_data_map:
226+
existing_dates = [entry["date"] for entry in existing_team_data_map[team_name]["data"]]
227+
if existing_dates:
228+
last_known_date = max(existing_dates) # Get the most recent date
229+
230+
# Assign the last know date to the `since` query parameter
231+
query_params = {}
232+
if last_known_date:
233+
query_params["since"] = last_known_date
234+
235+
single_team_history = get_team_history(gh, org, team_name, query_params)
236+
if not single_team_history:
237+
logger.info(f"No new history found for team {team_name}")
238+
continue
239+
240+
# Append new data to the existing team history
241+
new_team_data = single_team_history
242+
if team_name in existing_team_data_map:
243+
existing_team_data_map[team_name]["data"].extend(new_team_data)
244+
else:
245+
existing_team_data_map[team_name] = {"team": team, "data": new_team_data}
246+
247+
# Convert the updated team data map back to a list
248+
updated_team_history = list(existing_team_data_map.values())
249+
250+
# Write updated team history to S3
251+
s3.put_object(
252+
Bucket=bucket_name,
253+
Key="teams_history.json",
254+
Body=json.dumps(updated_team_history, indent=4).encode("utf-8"),
255+
)
256+
257+
logger.info("Uploaded updated teams_history.json to S3")
258+
259+
return "Github Data logging is now complete."
260+
261+
262+
def get_team_history(gh: github_api_toolkit.github_interface, org: str, team: str, query_params: dict = None):
263+
"""
264+
Gets the team metrics Copilot data through the API.
265+
Note - This endpoint will only return results for a given day if the team had
266+
five or more members with active Copilot licenses on that day,
267+
as evaluated at the end of that day.
268+
269+
Args:
270+
gh (github_api_toolkit.github_interface): An instance of the github_interface class.
271+
org (str): Organisation name.
272+
team (str): Team name.
273+
query_params (dict): Additional query parameters for the API request.
274+
275+
Returns:
276+
json: A json of team's GitHub team metrics or None if an error occurs.
277+
"""
278+
try:
279+
response = gh.get(f"/orgs/{org}/team/{team}/copilot/metrics", params=query_params)
280+
return response.json()
281+
except Exception as e:
282+
logger.error(f"Error getting history for team {team} due to {e} with Github API")
283+
return None
284+

lambda_data_logger/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ certifi==2024.7.4
44
cffi==1.16.0
55
charset-normalizer==3.3.2
66
cryptography==42.0.8
7-
github-api-toolkit @ git+https://github.com/ONS-Innovation/github-api-package.git@v1.0.0
7+
github-api-toolkit @ git+https://github.com/ONS-Innovation/github-api-package@v2.0.3
88
idna==3.7
99
jmespath==1.0.1
1010
jwt==1.3.1

src/pages/interim_page.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def generate_datasets(date_range: tuple):
9797
# Create a subset of data based on slider selection
9898
df_usage_data_subset = df_usage_data.loc[
9999
(df_usage_data["date"] >= date_range[0]) & (df_usage_data["date"] <= date_range[1])
100-
].reset_index(drop=True)
100+
].reset_index(drop=True).dropna()
101101

102102
copilot_chat = pd.DataFrame()
103103
ide_completions = pd.DataFrame()
@@ -106,7 +106,6 @@ def generate_datasets(date_range: tuple):
106106

107107
# Get copilot chat data
108108
editors_data = df_usage_data_subset.iloc[i]["copilot_ide_chat.editors"]
109-
110109
for editor in editors_data:
111110
editor_name = editor.get("name", "")
112111
models_data = editor.get("models", [])
@@ -122,7 +121,6 @@ def generate_datasets(date_range: tuple):
122121
}])
123122
copilot_chat = pd.concat([copilot_chat, data], ignore_index=True)
124123

125-
126124
# Get IDE completions data
127125
editors_data = df_usage_data_subset.iloc[i]["copilot_ide_code_completions.editors"]
128126

@@ -364,12 +362,24 @@ def generate_datasets(date_range: tuple):
364362
# Format into a year format (i.e 2022)
365363
df_historic_data["date"] = df_historic_data["date"].dt.strftime("%Y")
366364

365+
# Remove any rows where the data is NAN
366+
initial_historical_data = historic_data
367+
indexes = []
368+
for day in initial_historical_data:
369+
try:
370+
editor = day["copilot_ide_chat"]["editors"]
371+
except KeyError:
372+
indexes.append(initial_historical_data.index(day))
373+
374+
for index in indexes:
375+
initial_historical_data.pop(index)
376+
367377
# Extract IDE chat data
368378
df_chat = pd.json_normalize(
369-
historic_data,
379+
initial_historical_data,
370380
record_path=["copilot_ide_chat", "editors", "models"],
371381
meta=["date"],
372-
errors="ignore"
382+
errors="ignore"
373383
)
374384
df_chat["date"] = pd.to_datetime(df_chat["date"]).dt.strftime(
375385
"%Y-%m-%d" if date_grouping == "Day" else

0 commit comments

Comments
 (0)