Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/guides/model_selection.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,9 @@ Models:
#### Select with git changes

The git-based selector allows you to select models whose files have changed compared to a target branch (default: main). This includes:

- Untracked files (new files not in git)
- Uncommitted changes in working directory
- Uncommitted changes in working directory (both staged and unstaged)
- Committed changes different from the target branch

For example:
Expand Down
4 changes: 3 additions & 1 deletion sqlmesh/utils/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def list_untracked_files(self) -> t.List[Path]:
)

def list_uncommitted_changed_files(self) -> t.List[Path]:
return self._execute_list_output(["diff", "--name-only", "--diff-filter=d"], self._git_root)
return self._execute_list_output(
["diff", "--name-only", "--diff-filter=d", "HEAD"], self._git_root
)

def list_committed_changed_files(self, target_branch: str = "main") -> t.List[Path]:
return self._execute_list_output(
Expand Down
88 changes: 88 additions & 0 deletions tests/core/test_selector_native.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import pytest
from pytest_mock.plugin import MockerFixture
import subprocess

from sqlmesh.core import dialect as d
from sqlmesh.core.audit import StandaloneAudit
Expand All @@ -16,6 +17,7 @@
from sqlmesh.core.snapshot import SnapshotChangeCategory
from sqlmesh.utils import UniqueKeyDict
from sqlmesh.utils.date import now_timestamp
from sqlmesh.utils.git import GitClient


@pytest.mark.parametrize(
Expand Down Expand Up @@ -634,6 +636,92 @@ def test_expand_git_selection(
git_client_mock.list_untracked_files.assert_called_once()


def test_expand_git_selection_integration(tmp_path: Path, mocker: MockerFixture):
repo_path = tmp_path / "test_repo"
repo_path.mkdir()
subprocess.run(["git", "init", "-b", "main"], cwd=repo_path, check=True, capture_output=True)

models: UniqueKeyDict[str, Model] = UniqueKeyDict("models")
model_a_path = repo_path / "model_a.sql"
model_a_path.write_text("SELECT 1 AS a")
model_a = SqlModel(name="test_model_a", query=d.parse_one("SELECT 1 AS a"))
model_a._path = model_a_path
models[model_a.fqn] = model_a

model_b_path = repo_path / "model_b.sql"
model_b_path.write_text("SELECT 2 AS b")
model_b = SqlModel(name="test_model_b", query=d.parse_one("SELECT 2 AS b"))
model_b._path = model_b_path
models[model_b.fqn] = model_b

subprocess.run(["git", "add", "."], cwd=repo_path, check=True, capture_output=True)
subprocess.run(
[
"git",
"-c",
"user.name=Max",
"-c",
"user.email=max@rb.com",
"commit",
"-m",
"Initial commit",
],
cwd=repo_path,
check=True,
capture_output=True,
)

# no changes should select nothing
git_client = GitClient(repo_path)
selector = NativeSelector(mocker.Mock(), models)
selector._git_client = git_client
assert selector.expand_model_selections([f"git:main"]) == set()

# modify A but dont stage it, should be only selected
model_a_path.write_text("SELECT 10 AS a")
assert selector.expand_model_selections([f"git:main"]) == {'"test_model_a"'}

# stage model A, should still select it
subprocess.run(["git", "add", "model_a.sql"], cwd=repo_path, check=True, capture_output=True)
assert selector.expand_model_selections([f"git:main"]) == {'"test_model_a"'}

# now add unstaged change to B and both should be selected
model_b_path.write_text("SELECT 20 AS b")
assert selector.expand_model_selections([f"git:main"]) == {
'"test_model_a"',
'"test_model_b"',
}

subprocess.run(
["git", "checkout", "-b", "dev"],
cwd=repo_path,
check=True,
capture_output=True,
)

subprocess.run(
[
"git",
"-c",
"user.name=Max",
"-c",
"user.email=max@rb.com",
"commit",
"-m",
"Update model_a",
],
cwd=repo_path,
check=True,
capture_output=True,
)

# now A is committed in the dev branch and B unstaged but should both be selected
assert selector.expand_model_selections([f"git:main"]) == {
'"test_model_a"',
'"test_model_b"',
}


def test_select_models_with_external_parent(mocker: MockerFixture):
default_catalog = "test_catalog"
added_model = SqlModel(
Expand Down
173 changes: 173 additions & 0 deletions tests/utils/test_git_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import subprocess
from pathlib import Path
import pytest
from sqlmesh.utils.git import GitClient


@pytest.fixture
def git_repo(tmp_path: Path) -> Path:
repo_path = tmp_path / "test_repo"
repo_path.mkdir()
subprocess.run(["git", "init", "-b", "main"], cwd=repo_path, check=True, capture_output=True)
return repo_path


def test_git_uncommitted_changes(git_repo: Path):
git_client = GitClient(git_repo)

test_file = git_repo / "model.sql"
test_file.write_text("SELECT 1 AS a")
subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True)
subprocess.run(
[
"git",
"-c",
"user.name=Max",
"-c",
"user.email=max@rb.com",
"commit",
"-m",
"Initial commit",
],
cwd=git_repo,
check=True,
capture_output=True,
)
assert git_client.list_uncommitted_changed_files() == []

# make an unstaged change and see that it is listed
test_file.write_text("SELECT 2 AS a")
uncommitted = git_client.list_uncommitted_changed_files()
assert len(uncommitted) == 1
assert uncommitted[0].name == "model.sql"

# stage the change and test that it is still detected
subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True)
uncommitted = git_client.list_uncommitted_changed_files()
assert len(uncommitted) == 1
assert uncommitted[0].name == "model.sql"


def test_git_both_staged_and_unstaged_changes(git_repo: Path):
git_client = GitClient(git_repo)

file1 = git_repo / "model1.sql"
file2 = git_repo / "model2.sql"
file1.write_text("SELECT 1")
file2.write_text("SELECT 2")
subprocess.run(["git", "add", "."], cwd=git_repo, check=True, capture_output=True)
subprocess.run(
[
"git",
"-c",
"user.name=Max",
"-c",
"user.email=max@rb.com",
"commit",
"-m",
"Initial commit",
],
cwd=git_repo,
check=True,
capture_output=True,
)

# stage file1
file1.write_text("SELECT 10")
subprocess.run(["git", "add", "model1.sql"], cwd=git_repo, check=True, capture_output=True)

# modify file2 but don't stage it!
file2.write_text("SELECT 20")

# both should be detected
uncommitted = git_client.list_uncommitted_changed_files()
assert len(uncommitted) == 2
file_names = {f.name for f in uncommitted}
assert file_names == {"model1.sql", "model2.sql"}


def test_git_untracked_files(git_repo: Path):
git_client = GitClient(git_repo)
initial_file = git_repo / "initial.sql"
initial_file.write_text("SELECT 0")
subprocess.run(["git", "add", "initial.sql"], cwd=git_repo, check=True, capture_output=True)
subprocess.run(
[
"git",
"-c",
"user.name=Max",
"-c",
"user.email=max@rb.com",
"commit",
"-m",
"Initial commit",
],
cwd=git_repo,
check=True,
capture_output=True,
)

new_file = git_repo / "new_model.sql"
new_file.write_text("SELECT 1")

# untracked file should not appear in uncommitted changes
assert git_client.list_uncommitted_changed_files() == []

# but in untracked
untracked = git_client.list_untracked_files()
assert len(untracked) == 1
assert untracked[0].name == "new_model.sql"


def test_git_committed_changes(git_repo: Path):
git_client = GitClient(git_repo)

test_file = git_repo / "model.sql"
test_file.write_text("SELECT 1")
subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True)
subprocess.run(
[
"git",
"-c",
"user.name=Max",
"-c",
"user.email=max@rb.com",
"commit",
"-m",
"Initial commit",
],
cwd=git_repo,
check=True,
capture_output=True,
)

subprocess.run(
["git", "checkout", "-b", "feature"],
cwd=git_repo,
check=True,
capture_output=True,
)

test_file.write_text("SELECT 2")
subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True)
subprocess.run(
[
"git",
"-c",
"user.name=Max",
"-c",
"user.email=max@rb.com",
"commit",
"-m",
"Update on feature branch",
],
cwd=git_repo,
check=True,
capture_output=True,
)

committed = git_client.list_committed_changed_files(target_branch="main")
assert len(committed) == 1
assert committed[0].name == "model.sql"

assert git_client.list_uncommitted_changed_files() == []
Loading