Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@
from src.commands.database.command import CommandDatabase
from src.commands.process.command import CommandProcess
from src.commands.report.command import CommandReport
from src.commands.secvars.command import CommandSecretsAndVariables
from src.libs.utils import Utils
from src.libs.exceptions import InvalidCommandLine
from src.commands.download.command import CommandDownload


__VERSION__ = '0.10.2 Beta'
__VERSION__ = '0.12.0'
commands = {
'download': CommandDownload,
'database': CommandDatabase,
'process': CommandProcess,
'report': CommandReport,
'secvars': CommandSecretsAndVariables
}

parser = argparse.ArgumentParser(prog="butler", description=f"Butler - GitHub Actions Oversight v{__VERSION__}")
Expand Down
38 changes: 0 additions & 38 deletions src/commands/download/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,44 +160,6 @@ def _collect_repos(self, repos: list[RepoComponent]) -> None:
self.log.trace(f"Writing to database...")
self.database.commit()

def _save_repo(self, repo: RepoComponent) -> None:
with self.lock:
self.log.info(f"Saving repository {repo}")
if repo.org.id == 0:
repo.org.id = self._create_org(repo.org).id

if len(repo.ref) > 0:
repo.poll_status = PollStatus.PENDING
repo_db = self.database.repos().create(repo)
return

# At this point, there is no `ref` in the object.
# Search if the repo is already in the database.
repo_db = self.database.repos().find(repo.org.id, repo.name, None)
if self._repo_already_stored(repo_db):
return

# Either there's no database record, or the stored one also has an empty ref.
fresh_repo = self._fetch_repo(repo)
with self.lock:
if fresh_repo.org.name.lower() == repo.org.name.lower() and fresh_repo.name.lower() == repo.name.lower():
fresh_repo.org.id = repo.org.id
fresh_repo.poll_status = PollStatus.SCANNED if repo.status == RepoStatus.MISSING else PollStatus.PENDING
if repo_db:
fresh_repo.id = repo_db.id
self.database.repos().update(fresh_repo)
else:
self.database.repos().create(fresh_repo)
return

# Here, the fetched repo is different to the one passed to the function, this happens when a repo is redirected.
fresh_repo.org.id = self._create_org(fresh_repo.org).id
fresh_repo_db = self.database.repos().create(fresh_repo)

repo.redirect_id = fresh_repo_db.id
repo.status = RepoStatus.REDIRECT
self.database.repos().create(repo)

def _resolve_commits(self) -> None:
while True:
batch = self.database.next_commit_to_resolve(self.threads)
Expand Down
39 changes: 39 additions & 0 deletions src/commands/download/download_helper.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from src.libs.constants import PollStatus
from src.database.models import OrganisationModel, RepositoryModel
from src.github.exceptions import HttpNotFound
from src.libs.components.org import OrgComponent
Expand Down Expand Up @@ -167,3 +168,41 @@ def _create_child_workflow_from_workflow(self, uses: str, workflow_instance: Wor
else:
org, repo, workflow = self._create_child_workflow_from_action(uses)
return org, repo, workflow

def _save_repo(self, repo: RepoComponent) -> None:
with self.lock:
self.log.info(f"Saving repository {repo}")
if repo.org.id == 0:
repo.org.id = self._create_org(repo.org).id

if len(repo.ref) > 0:
repo.poll_status = PollStatus.PENDING
repo_db = self.database.repos().create(repo)
return

# At this point, there is no `ref` in the object.
# Search if the repo is already in the database.
repo_db = self.database.repos().find(repo.org.id, repo.name, None)
if self._repo_already_stored(repo_db):
return

# Either there's no database record, or the stored one also has an empty ref.
fresh_repo = self._fetch_repo(repo)
with self.lock:
if fresh_repo.org.name.lower() == repo.org.name.lower() and fresh_repo.name.lower() == repo.name.lower():
fresh_repo.org.id = repo.org.id
fresh_repo.poll_status = PollStatus.SCANNED if repo.status == RepoStatus.MISSING else PollStatus.PENDING
if repo_db:
fresh_repo.id = repo_db.id
self.database.repos().update(fresh_repo)
else:
self.database.repos().create(fresh_repo)
return

# Here, the fetched repo is different to the one passed to the function, this happens when a repo is redirected.
fresh_repo.org.id = self._create_org(fresh_repo.org).id
fresh_repo_db = self.database.repos().create(fresh_repo)

repo.redirect_id = fresh_repo_db.id
repo.status = RepoStatus.REDIRECT
self.database.repos().create(repo)
51 changes: 51 additions & 0 deletions src/commands/secvars/command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
import argparse
from src.commands.secvars.secvars import ServiceSecretsAndVariables
from src.commands.command import Command
from src.database.database import Database
from src.libs.exceptions import InvalidCommandLine
from src.github.client import GitHubClient


class CommandSecretsAndVariables(Command):
@staticmethod
def load_command_line(subparsers: any) -> None:
subparser = subparsers.add_parser("secvars", help="Download Secrets and Variables from GitHub")

subparser.add_argument("--org", type=str, help="Organisation to download secrets and variables for")
subparser.add_argument("--database", default="database.db", type=str, help="Path to SQLite database to create or connect to")
subparser.add_argument("--resume-next", default=True, action="store_true", help="Resume downloads on server errors")
subparser.add_argument("--threads", default=1, type=int, help="Enable multithreading")

Command.define_shared_arguments(subparser)

def load_arguments(self, arguments: argparse.Namespace) -> dict:
return {
'org': '' if arguments.org is None or len(arguments.org.strip()) == 0 else arguments.org.strip(),
'database': '' if arguments.database is None or len(arguments.database.strip()) == 0 else os.path.realpath(arguments.database.strip()),
'resume_next': arguments.resume_next or False,
'threads': int(arguments.threads),
}

def validate_command_arguments(self, arguments: dict) -> None:
# Validate database.
if len(arguments['database']) == 0:
raise InvalidCommandLine(f"--database cannot be empty")
elif not arguments['database'].lower().endswith('.sqlite3') and not arguments['database'].lower().endswith('.db'):
raise InvalidCommandLine(f"--database {arguments['database']} is not a SQLite database (must end with .sqlite3 or .db)")

if len(arguments['org']) == 0:
raise InvalidCommandLine(f"--org cannot be empty")

if arguments['threads'] <= 0:
arguments['threads'] = 1

def execute(self, arguments: dict) -> bool:
database = Database(arguments['database'], arguments['db_debug'], arguments['db_debug_auto_commit'])

service = ServiceSecretsAndVariables(self.log, database)
service.github_client = GitHubClient(self.tokens, self.log)
service.org = arguments['org']
service.resume_next = arguments['resume_next']
service.threads = arguments['threads']
return service.run()
188 changes: 188 additions & 0 deletions src/commands/secvars/secvars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import threading
import concurrent.futures
from contextlib import nullcontext
from src.database.models import OrganisationModel, RepositoryModel
from src.libs.constants import SecretVariableCategory, SecretVariableType, SecretVariableVisibility
from src.libs.components.secvar import SecretVariableComponent
from src.commands.download.download_helper import DownloadHelper
from src.commands.service import Service
from src.github.client import GitHubClient
from src.github.exceptions import TooManyRequests, ApiRateLimitExceeded, OrgNotFound
from src.libs.components.org import OrgComponent
from src.libs.constants import PollStatus, OrgStatus
from src.libs.exceptions import InvalidCommandLine
from src.libs.utils import Utils


class ServiceSecretsAndVariables(Service, DownloadHelper):
org: str = None
lock: threading.Lock = None
resume_next: bool = None
github_client: GitHubClient = None
threads: int = None

_combinations: list = [
{'label': 'actions / secrets', 'category': SecretVariableCategory.ACTIONS, 'type': SecretVariableType.SECRET},
{'label': 'actions / variables', 'category': SecretVariableCategory.ACTIONS, 'type': SecretVariableType.VARIABLE},
{'label': 'agents / secrets', 'category': SecretVariableCategory.AGENTS, 'type': SecretVariableType.SECRET},
{'label': 'agents / variables', 'category': SecretVariableCategory.AGENTS, 'type': SecretVariableType.VARIABLE},
{'label': 'codespaces / secrets', 'category': SecretVariableCategory.CODESPACES, 'type': SecretVariableType.SECRET},
{'label': 'dependabot / secrets', 'category': SecretVariableCategory.DEPENDABOT, 'type': SecretVariableType.SECRET},
]

def run(self) -> bool:
# Thanks SQLite3 :>
self.lock = threading.Lock() if self.threads > 1 else nullcontext()

while True:
try:
self.log.info(f"Collecting repositories for {self.org}...")
self._collect_targets()

org = self.database.orgs().find(self.org)
if not org:
raise InvalidCommandLine(f"Organisation {self.org} not found")

self.log.info(f"Collecting secrets and variables for {org.name}")
self._collect_secrets_and_variables(org)

break
except (TooManyRequests, ApiRateLimitExceeded) as e:
if self.resume_next:
self.github_client.halt_and_continue(5)
continue
raise
except Exception as e:
if self.resume_next and 'Server Error' in str(e):
self.github_client.halt_and_continue(2)
continue
raise

if self.database.debug:
self.log.info(f"Total SQL Queries: {self.database.total_queries}")

self.log.info(f"Total API Calls: {self.github_client._api.total_requests}")
return True

def _collect_targets(self) -> None:
orgs, repos = Utils.filter_orgs_and_repos([self.org])
self.log.debug(f"Input has {len(orgs)} organisations and {len(repos)} repositories")
self._collect_orgs(orgs)

def _collect_orgs(self, orgs: list[OrgComponent]) -> None:
count = 0
for org in orgs:
count += 1
self.log.info(f"Processing {org} ({count}/{len(orgs)})")

org_db = self._create_org(org)
org.id = org_db.id

if org_db.poll_status == PollStatus.SCANNED:
self.log.debug(f"Organisation {org_db.name} already scanned - skipping")
continue
elif org_db.poll_status == PollStatus.NONE:
self.log.info(f"Organisation {org_db.name} is new or was not marked as pending before")
self.database.orgs().set_poll_status(org_db.id, PollStatus.PENDING)

try:
for batch in self.github_client.get_org_repos(org.name, True, True):
for repo in batch:
repo.org.id = org_db.id
self._save_repo(repo)
except OrgNotFound as e:
self.log.error(f"Organisation {org.name} not found")
self.database.orgs().set_status(org_db.id, OrgStatus.MISSING)

self.database.orgs().set_poll_status(org_db.id, PollStatus.SCANNED)

self.log.trace(f"Writing to database...")
self.database.commit()

self.log.trace(f"Writing to database...")
self.database.commit()

def _collect_secrets_and_variables(self, org: OrganisationModel) -> None:
components = []
for item in self._combinations:
self.log.info(f"Getting organisation {item['label']}")
try:
results = self.github_client.get_secrets(org.name, item['category'], item['type'], None, None)
components.extend(self._create_components(org.id, results, item['category'], item['type']))
except Exception as e:
self.log.warning(f"Could not get {item['label']}")

self.log.info(f"Writing to database")
for component in components:
self.database.secvars().create(0, component)

self.database.commit()

self.log.info(f"Getting organisation repos")
repos = self.database.repos().all(org.id)
self.log.info(f"Got {len(repos)} repos")

with concurrent.futures.ThreadPoolExecutor(max_workers=self.threads) as executor:
# Submit all repositories to the executor
future_to_repo = {executor.submit(self._fetch_secvar_repo_single, org, repo): repo for repo in repos}

for future in concurrent.futures.as_completed(future_to_repo):
repo = future_to_repo[future]
try:
# Retrieve the data returned by fetch_repo_data
returned_repo, components = future.result()

# 3. Write to the database safely in the main thread
if components:
self.log.info(f"Writing {len(components)} components to database for {returned_repo.name}")
with self.lock:
for component in components:
self.database.secvars().create(repo.id, component)

self.database.commit()

except Exception as e:
self.log.error(f"Unhandled exception processing repo {repo.name}: {e}")

def _fetch_secvar_repo_single(self, org: OrganisationModel, repo: RepositoryModel) -> tuple[object, list]:
components = []
self.log.info(f"Getting secrets and variables for {repo.name}")
for item in self._combinations:
self.log.info(f"Getting data for repo {repo.name} and {item['label']}")
try:
results = self.github_client.get_secrets(org.name, item['category'], item['type'], None, repo.name)
components.extend(self._create_components(org.id, results, item['category'], item['type']))
except Exception as e:
self.log.warning(f"Could not get {item['label']} for {repo.name}")

return repo, components

def _create_components(self, org_id: int, items: list, category: SecretVariableCategory, type: SecretVariableType) -> list[SecretVariableComponent]:
components = []
for item in items:
component = SecretVariableComponent()
component.category = category
component.type = type
component.name = item['name']
if component.type == SecretVariableType.VARIABLE:
component.value = item['value']
component.created_at = item['created_at']
component.updated_at = item['updated_at']

if 'visibility' in item:
match item['visibility']:
case 'all':
component.visibility = SecretVariableVisibility.ALL
case 'private':
component.visibility = SecretVariableVisibility.PRIVATE
case 'selected':
component.visibility = SecretVariableVisibility.SELECTED

repos = []
for repo in item['repos']:
repo = self.database.repos().find(org_id, repo, None)
repos.append(repo)
component.repos = repos

components.append(component)
return components
10 changes: 8 additions & 2 deletions src/database/database.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from src.database.helpers.db_secvars import DBSecretsAndVariables
from src.database.database_helper import DatabaseHelper
from src.database.helpers.db_config import DBConfig
from src.database.helpers.db_jobs import DBJob
Expand All @@ -14,7 +15,7 @@


class Database(DatabaseHelper):
__VERSION__: str = '1.0.1'
__VERSION__: str = '1.1.0'
_engine: Engine = None
_sessionmaker: sessionmaker = None
_session = None
Expand All @@ -26,6 +27,7 @@ class Database(DatabaseHelper):
_steps: DBStep | None = None
_vars: DBVars | None = None
_config: DBConfig | None = None
_secvars: DBSecretsAndVariables | None = None

_total_queries: int = 0
_debug: bool = False
Expand Down Expand Up @@ -98,7 +100,6 @@ def _update_views(self) -> None:
"""
self.execute(sql)


def orgs(self) -> DBOrg:
if not self._orgs:
self._orgs = DBOrg(self.session, self.auto_commit)
Expand Down Expand Up @@ -134,6 +135,11 @@ def config(self) -> DBConfig:
self._config = DBConfig(self.session, self.auto_commit)
return self._config

def secvars(self) -> DBSecretsAndVariables:
if not self._secvars:
self._secvars = DBSecretsAndVariables(self.session, self.auto_commit)
return self._secvars

def commit(self) -> None:
self.session.commit()

Expand Down
Loading