diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml index c7b47e7..cb68052 100644 --- a/.github/workflows/playwright.yml +++ b/.github/workflows/playwright.yml @@ -33,6 +33,20 @@ jobs: with: python-version: '3.12' + - name: Resolve seed repository SHAs + id: seed-repo-shas + run: | + echo "graphrag_sdk=$(git ls-remote https://github.com/FalkorDB/GraphRAG-SDK HEAD | cut -f1)" >> "$GITHUB_OUTPUT" + echo "flask=$(git ls-remote https://github.com/pallets/flask HEAD | cut -f1)" >> "$GITHUB_OUTPUT" + + - name: Cache seeded repository clones + uses: actions/cache@v5 + with: + path: ${{ github.workspace }}/repositories + key: ${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}-${{ steps.seed-repo-shas.outputs.graphrag_sdk }}-${{ steps.seed-repo-shas.outputs.flask }} + restore-keys: | + ${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}- + - name: Install backend dependencies run: pip install -e . diff --git a/e2e/seed_test_data.py b/e2e/seed_test_data.py index 360622a..0b06491 100644 --- a/e2e/seed_test_data.py +++ b/e2e/seed_test_data.py @@ -1,9 +1,15 @@ #!/usr/bin/env python3 """Seed FalkorDB with test data for Playwright e2e tests.""" -import os -import sys import logging +import os +import shutil +import subprocess +from pathlib import Path +from urllib.parse import urlparse + +from api.project import Project +from falkordb import FalkorDB logging.basicConfig( level=logging.INFO, @@ -11,9 +17,6 @@ ) logger = logging.getLogger(__name__) -from falkordb import FalkorDB -from api.project import Project - REPOS = [ "https://github.com/FalkorDB/GraphRAG-SDK", "https://github.com/pallets/flask", @@ -25,6 +28,51 @@ ("import_data", "add_node"), ] +REPOSITORIES_DIR = Path(__file__).resolve().parent.parent / "repositories" + + +def repo_name_from_url(url: str) -> str: + parsed_path = urlparse(url).path.rstrip("/") + repo_name = parsed_path.split("/")[-1] + return repo_name.removesuffix(".git") + + +def fresh_clone_repository(url: str, path: Path) -> Path: + if path.exists(): + # Replace any existing directory before creating a fresh shallow clone. + shutil.rmtree(path) + + path.parent.mkdir(parents=True, exist_ok=True) + try: + subprocess.run( + ["git", "clone", "--depth", "1", url, str(path)], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + logger.error( + "git clone failed for %s (return code %s). Stderr:\n%s", + url, + e.returncode, + e.stderr or "", + ) + raise + + return path + + +def load_project(url: str) -> Project: + repo_path = REPOSITORIES_DIR / repo_name_from_url(url) + + if (repo_path / ".git").exists(): + logger.info("Using cached repository clone at %s", repo_path) + else: + logger.info("Cloning repository into cache at %s", repo_path) + fresh_clone_repository(url, repo_path) + + return Project.from_local_repository(repo_path) + def ensure_calls_edges(graph_name: str) -> None: """Ensure required CALLS edges exist for E2E tests. @@ -63,7 +111,7 @@ def ensure_calls_edges(graph_name: str) -> None: def main(): for url in REPOS: logger.info("Seeding %s ...", url) - proj = Project.from_git_repository(url) + proj = load_project(url) proj.analyze_sources() logger.info("Done seeding %s", url) diff --git a/tests/test_seed_test_data.py b/tests/test_seed_test_data.py new file mode 100644 index 0000000..e22c124 --- /dev/null +++ b/tests/test_seed_test_data.py @@ -0,0 +1,51 @@ +from pathlib import Path + +import e2e.seed_test_data as seed_test_data + + +def test_load_project_uses_existing_cached_clone(monkeypatch, tmp_path): + repo_path = tmp_path / "GraphRAG-SDK" + (repo_path / ".git").mkdir(parents=True) + + calls = [] + + class FakeProject: + @staticmethod + def from_local_repository(path): + calls.append(path) + return path + + monkeypatch.setattr(seed_test_data, "REPOSITORIES_DIR", tmp_path) + monkeypatch.setattr(seed_test_data, "Project", FakeProject) + + project = seed_test_data.load_project("https://github.com/FalkorDB/GraphRAG-SDK") + + assert project == repo_path + assert calls == [repo_path] + + +def test_load_project_clones_into_cache(monkeypatch, tmp_path): + repo_path = tmp_path / "flask" + clone_calls = [] + project_calls = [] + + class FakeProject: + @staticmethod + def from_local_repository(path): + project_calls.append(path) + return path + + def fake_clone(url: str, path: Path) -> Path: + clone_calls.append((url, path)) + (path / ".git").mkdir(parents=True) + return path + + monkeypatch.setattr(seed_test_data, "REPOSITORIES_DIR", tmp_path) + monkeypatch.setattr(seed_test_data, "Project", FakeProject) + monkeypatch.setattr(seed_test_data, "fresh_clone_repository", fake_clone) + + project = seed_test_data.load_project("https://github.com/pallets/flask") + + assert project == repo_path + assert clone_calls == [("https://github.com/pallets/flask", repo_path)] + assert project_calls == [repo_path]