Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/playwright.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,20 @@ jobs:
with:
python-version: '3.12'

- name: Resolve seed repository SHAs
id: seed-repo-shas
run: |
echo "graphrag_sdk=$(git ls-remote https://github.com/FalkorDB/GraphRAG-SDK HEAD | cut -f1)" >> "$GITHUB_OUTPUT"
echo "flask=$(git ls-remote https://github.com/pallets/flask HEAD | cut -f1)" >> "$GITHUB_OUTPUT"

- name: Cache seeded repository clones
uses: actions/cache@v5
with:
path: ${{ github.workspace }}/repositories
key: ${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}-${{ steps.seed-repo-shas.outputs.graphrag_sdk }}-${{ steps.seed-repo-shas.outputs.flask }}
restore-keys: |
${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}-

- name: Install backend dependencies
run: pip install -e .

Expand Down
60 changes: 54 additions & 6 deletions e2e/seed_test_data.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
#!/usr/bin/env python3
"""Seed FalkorDB with test data for Playwright e2e tests."""

import os
import sys
import logging
import os
import shutil
import subprocess
from pathlib import Path
from urllib.parse import urlparse

from api.project import Project
from falkordb import FalkorDB

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)

from falkordb import FalkorDB
from api.project import Project

REPOS = [
"https://github.com/FalkorDB/GraphRAG-SDK",
"https://github.com/pallets/flask",
Expand All @@ -25,6 +28,51 @@
("import_data", "add_node"),
]

REPOSITORIES_DIR = Path(__file__).resolve().parent.parent / "repositories"


def repo_name_from_url(url: str) -> str:
parsed_path = urlparse(url).path.rstrip("/")
repo_name = parsed_path.split("/")[-1]
return repo_name.removesuffix(".git")


def fresh_clone_repository(url: str, path: Path) -> Path:
if path.exists():
# Replace any existing directory before creating a fresh shallow clone.
shutil.rmtree(path)

path.parent.mkdir(parents=True, exist_ok=True)
try:
subprocess.run(
["git", "clone", "--depth", "1", url, str(path)],
check=True,
capture_output=True,
text=True,
)
except subprocess.CalledProcessError as e:
logger.error(
"git clone failed for %s (return code %s). Stderr:\n%s",
url,
e.returncode,
e.stderr or "<no stderr captured>",
)
raise

return path


def load_project(url: str) -> Project:
repo_path = REPOSITORIES_DIR / repo_name_from_url(url)

if (repo_path / ".git").exists():
logger.info("Using cached repository clone at %s", repo_path)
else:
logger.info("Cloning repository into cache at %s", repo_path)
fresh_clone_repository(url, repo_path)

return Project.from_local_repository(repo_path)


def ensure_calls_edges(graph_name: str) -> None:
"""Ensure required CALLS edges exist for E2E tests.
Expand Down Expand Up @@ -63,7 +111,7 @@ def ensure_calls_edges(graph_name: str) -> None:
def main():
for url in REPOS:
logger.info("Seeding %s ...", url)
proj = Project.from_git_repository(url)
proj = load_project(url)
proj.analyze_sources()
logger.info("Done seeding %s", url)

Expand Down
51 changes: 51 additions & 0 deletions tests/test_seed_test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from pathlib import Path

import e2e.seed_test_data as seed_test_data


def test_load_project_uses_existing_cached_clone(monkeypatch, tmp_path):
repo_path = tmp_path / "GraphRAG-SDK"
(repo_path / ".git").mkdir(parents=True)

calls = []

class FakeProject:
@staticmethod
def from_local_repository(path):
calls.append(path)
return path

monkeypatch.setattr(seed_test_data, "REPOSITORIES_DIR", tmp_path)
monkeypatch.setattr(seed_test_data, "Project", FakeProject)

project = seed_test_data.load_project("https://github.com/FalkorDB/GraphRAG-SDK")

assert project == repo_path
assert calls == [repo_path]


def test_load_project_clones_into_cache(monkeypatch, tmp_path):
repo_path = tmp_path / "flask"
clone_calls = []
project_calls = []

class FakeProject:
@staticmethod
def from_local_repository(path):
project_calls.append(path)
return path

def fake_clone(url: str, path: Path) -> Path:
clone_calls.append((url, path))
(path / ".git").mkdir(parents=True)
return path

monkeypatch.setattr(seed_test_data, "REPOSITORIES_DIR", tmp_path)
monkeypatch.setattr(seed_test_data, "Project", FakeProject)
monkeypatch.setattr(seed_test_data, "fresh_clone_repository", fake_clone)

project = seed_test_data.load_project("https://github.com/pallets/flask")

assert project == repo_path
assert clone_calls == [("https://github.com/pallets/flask", repo_path)]
assert project_calls == [repo_path]
Loading