From 585421525411dcadb8c3b8bfd5006ebc3d8c847a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 07:57:47 +0000 Subject: [PATCH 1/7] Initial plan From 113d4b85e5b0101d2b1af569d609385ecba9617c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 08:01:58 +0000 Subject: [PATCH 2/7] feat: cache Playwright seed repository clones Co-authored-by: gkorland <753206+gkorland@users.noreply.github.com> --- .github/workflows/playwright.yml | 12 ++++++++ e2e/seed_test_data.py | 48 ++++++++++++++++++++++++++---- tests/test_seed_test_data.py | 51 ++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 6 deletions(-) create mode 100644 tests/test_seed_test_data.py diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml index adc5b36..743f07e 100644 --- a/.github/workflows/playwright.yml +++ b/.github/workflows/playwright.yml @@ -28,6 +28,18 @@ jobs: with: python-version: '3.12' + - name: Resolve seed repository SHAs + id: seed-repo-shas + run: | + echo "graphrag_sdk=$(git ls-remote https://github.com/FalkorDB/GraphRAG-SDK HEAD | cut -f1)" >> "$GITHUB_OUTPUT" + echo "flask=$(git ls-remote https://github.com/pallets/flask HEAD | cut -f1)" >> "$GITHUB_OUTPUT" + + - name: Cache seeded repository clones + uses: actions/cache@v4 + with: + path: repositories + key: ${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}-${{ steps.seed-repo-shas.outputs.graphrag_sdk }}-${{ steps.seed-repo-shas.outputs.flask }} + - name: Install backend dependencies run: pip install -e . diff --git a/e2e/seed_test_data.py b/e2e/seed_test_data.py index 360622a..c07c793 100644 --- a/e2e/seed_test_data.py +++ b/e2e/seed_test_data.py @@ -1,9 +1,15 @@ #!/usr/bin/env python3 """Seed FalkorDB with test data for Playwright e2e tests.""" -import os -import sys import logging +import os +import shutil +import subprocess +from pathlib import Path +from urllib.parse import urlparse + +from api.project import Project +from falkordb import FalkorDB logging.basicConfig( level=logging.INFO, @@ -11,9 +17,6 @@ ) logger = logging.getLogger(__name__) -from falkordb import FalkorDB -from api.project import Project - REPOS = [ "https://github.com/FalkorDB/GraphRAG-SDK", "https://github.com/pallets/flask", @@ -25,6 +28,39 @@ ("import_data", "add_node"), ] +REPOSITORIES_DIR = Path.cwd() / "repositories" + + +def repo_name_from_url(url: str) -> str: + return urlparse(url).path.rstrip("/").split("/")[-1].removesuffix(".git") + + +def clone_repository(url: str, path: Path) -> Path: + if path.exists(): + shutil.rmtree(path) + + path.parent.mkdir(parents=True, exist_ok=True) + subprocess.run( + ["git", "clone", "--depth", "1", url, str(path)], + check=True, + capture_output=True, + text=True, + ) + + return path + + +def load_project(url: str) -> Project: + repo_path = REPOSITORIES_DIR / repo_name_from_url(url) + + if (repo_path / ".git").exists(): + logger.info("Using cached repository clone at %s", repo_path) + else: + logger.info("Cloning repository into cache at %s", repo_path) + clone_repository(url, repo_path) + + return Project.from_local_repository(repo_path) + def ensure_calls_edges(graph_name: str) -> None: """Ensure required CALLS edges exist for E2E tests. @@ -63,7 +99,7 @@ def ensure_calls_edges(graph_name: str) -> None: def main(): for url in REPOS: logger.info("Seeding %s ...", url) - proj = Project.from_git_repository(url) + proj = load_project(url) proj.analyze_sources() logger.info("Done seeding %s", url) diff --git a/tests/test_seed_test_data.py b/tests/test_seed_test_data.py new file mode 100644 index 0000000..6a06430 --- /dev/null +++ b/tests/test_seed_test_data.py @@ -0,0 +1,51 @@ +from pathlib import Path + +import e2e.seed_test_data as seed_test_data + + +def test_load_project_uses_cached_clone(monkeypatch, tmp_path): + repo_path = tmp_path / "GraphRAG-SDK" + (repo_path / ".git").mkdir(parents=True) + + calls = [] + + class FakeProject: + @staticmethod + def from_local_repository(path): + calls.append(path) + return path + + monkeypatch.setattr(seed_test_data, "REPOSITORIES_DIR", tmp_path) + monkeypatch.setattr(seed_test_data, "Project", FakeProject) + + project = seed_test_data.load_project("https://github.com/FalkorDB/GraphRAG-SDK") + + assert project == repo_path + assert calls == [repo_path] + + +def test_load_project_clones_into_cache(monkeypatch, tmp_path): + repo_path = tmp_path / "flask" + clone_calls = [] + project_calls = [] + + class FakeProject: + @staticmethod + def from_local_repository(path): + project_calls.append(path) + return path + + def fake_clone(url: str, path: Path) -> Path: + clone_calls.append((url, path)) + (path / ".git").mkdir(parents=True) + return path + + monkeypatch.setattr(seed_test_data, "REPOSITORIES_DIR", tmp_path) + monkeypatch.setattr(seed_test_data, "Project", FakeProject) + monkeypatch.setattr(seed_test_data, "clone_repository", fake_clone) + + project = seed_test_data.load_project("https://github.com/pallets/flask") + + assert project == repo_path + assert clone_calls == [("https://github.com/pallets/flask", repo_path)] + assert project_calls == [repo_path] From f779df895cd0492227a2219760801c2a97cf1d12 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 08:03:27 +0000 Subject: [PATCH 3/7] chore: refine seed cache workflow details Co-authored-by: gkorland <753206+gkorland@users.noreply.github.com> --- .github/workflows/playwright.yml | 2 +- e2e/seed_test_data.py | 9 ++++++--- tests/test_seed_test_data.py | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml index 743f07e..9e7fe63 100644 --- a/.github/workflows/playwright.yml +++ b/.github/workflows/playwright.yml @@ -37,7 +37,7 @@ jobs: - name: Cache seeded repository clones uses: actions/cache@v4 with: - path: repositories + path: ${{ github.workspace }}/repositories key: ${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}-${{ steps.seed-repo-shas.outputs.graphrag_sdk }}-${{ steps.seed-repo-shas.outputs.flask }} - name: Install backend dependencies diff --git a/e2e/seed_test_data.py b/e2e/seed_test_data.py index c07c793..05c6808 100644 --- a/e2e/seed_test_data.py +++ b/e2e/seed_test_data.py @@ -32,11 +32,14 @@ def repo_name_from_url(url: str) -> str: - return urlparse(url).path.rstrip("/").split("/")[-1].removesuffix(".git") + parsed_path = urlparse(url).path.rstrip("/") + repo_name = parsed_path.split("/")[-1] + return repo_name.removesuffix(".git") -def clone_repository(url: str, path: Path) -> Path: +def fresh_clone_repository(url: str, path: Path) -> Path: if path.exists(): + # Replace any existing directory before creating a fresh shallow clone. shutil.rmtree(path) path.parent.mkdir(parents=True, exist_ok=True) @@ -57,7 +60,7 @@ def load_project(url: str) -> Project: logger.info("Using cached repository clone at %s", repo_path) else: logger.info("Cloning repository into cache at %s", repo_path) - clone_repository(url, repo_path) + fresh_clone_repository(url, repo_path) return Project.from_local_repository(repo_path) diff --git a/tests/test_seed_test_data.py b/tests/test_seed_test_data.py index 6a06430..e22c124 100644 --- a/tests/test_seed_test_data.py +++ b/tests/test_seed_test_data.py @@ -3,7 +3,7 @@ import e2e.seed_test_data as seed_test_data -def test_load_project_uses_cached_clone(monkeypatch, tmp_path): +def test_load_project_uses_existing_cached_clone(monkeypatch, tmp_path): repo_path = tmp_path / "GraphRAG-SDK" (repo_path / ".git").mkdir(parents=True) @@ -42,7 +42,7 @@ def fake_clone(url: str, path: Path) -> Path: monkeypatch.setattr(seed_test_data, "REPOSITORIES_DIR", tmp_path) monkeypatch.setattr(seed_test_data, "Project", FakeProject) - monkeypatch.setattr(seed_test_data, "clone_repository", fake_clone) + monkeypatch.setattr(seed_test_data, "fresh_clone_repository", fake_clone) project = seed_test_data.load_project("https://github.com/pallets/flask") From 3c3a890f89b49c24f58e459d99fb2bcb208afa8a Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Tue, 10 Mar 2026 10:53:57 +0200 Subject: [PATCH 4/7] Update e2e/seed_test_data.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- e2e/seed_test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/seed_test_data.py b/e2e/seed_test_data.py index 05c6808..7df3199 100644 --- a/e2e/seed_test_data.py +++ b/e2e/seed_test_data.py @@ -28,7 +28,7 @@ ("import_data", "add_node"), ] -REPOSITORIES_DIR = Path.cwd() / "repositories" +REPOSITORIES_DIR = Path(__file__).resolve().parent.parent / "repositories" def repo_name_from_url(url: str) -> str: From 8bc25e4a5176cbb64cd74e27e76fe819f106be5f Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Tue, 10 Mar 2026 10:54:42 +0200 Subject: [PATCH 5/7] Update e2e/seed_test_data.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- e2e/seed_test_data.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/e2e/seed_test_data.py b/e2e/seed_test_data.py index 7df3199..0b06491 100644 --- a/e2e/seed_test_data.py +++ b/e2e/seed_test_data.py @@ -43,12 +43,21 @@ def fresh_clone_repository(url: str, path: Path) -> Path: shutil.rmtree(path) path.parent.mkdir(parents=True, exist_ok=True) - subprocess.run( - ["git", "clone", "--depth", "1", url, str(path)], - check=True, - capture_output=True, - text=True, - ) + try: + subprocess.run( + ["git", "clone", "--depth", "1", url, str(path)], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + logger.error( + "git clone failed for %s (return code %s). Stderr:\n%s", + url, + e.returncode, + e.stderr or "", + ) + raise return path From 5efaa1abc765d9c9bad27fc00cbf9f43bbb1e413 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Tue, 10 Mar 2026 10:55:44 +0200 Subject: [PATCH 6/7] Update .github/workflows/playwright.yml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .github/workflows/playwright.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml index 9e7fe63..020f851 100644 --- a/.github/workflows/playwright.yml +++ b/.github/workflows/playwright.yml @@ -39,6 +39,8 @@ jobs: with: path: ${{ github.workspace }}/repositories key: ${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}-${{ steps.seed-repo-shas.outputs.graphrag_sdk }}-${{ steps.seed-repo-shas.outputs.flask }} + restore-keys: | + ${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}- - name: Install backend dependencies run: pip install -e . From 8bd70fe5ab4bb5a64c5417910dd6ea763b355fcb Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Tue, 10 Mar 2026 11:44:36 +0200 Subject: [PATCH 7/7] fix: use actions/cache@v5 for seed repo cache Match the version used by the existing Playwright browser cache step. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/playwright.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml index 342dc49..cb68052 100644 --- a/.github/workflows/playwright.yml +++ b/.github/workflows/playwright.yml @@ -40,7 +40,7 @@ jobs: echo "flask=$(git ls-remote https://github.com/pallets/flask HEAD | cut -f1)" >> "$GITHUB_OUTPUT" - name: Cache seeded repository clones - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ${{ github.workspace }}/repositories key: ${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}-${{ steps.seed-repo-shas.outputs.graphrag_sdk }}-${{ steps.seed-repo-shas.outputs.flask }}