Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions providers/git/src/airflow/providers/git/bundles/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,8 @@ def _has_version(repo: Repo, version: str) -> bool:
def _fetch_bare_repo(self):
refspecs = ["+refs/heads/*:refs/heads/*", "+refs/tags/*:refs/tags/*"]
cm = nullcontext()
if self.hook and (cmd := self.hook.env.get("GIT_SSH_COMMAND")):
cm = self.bare_repo.git.custom_environment(GIT_SSH_COMMAND=cmd)
if self.hook and self.hook.env:
cm = self.bare_repo.git.custom_environment(**self.hook.env)
with cm:
self.bare_repo.remotes.origin.fetch(refspecs)
self.bare_repo.close()
Expand Down
25 changes: 25 additions & 0 deletions providers/git/src/airflow/providers/git/hooks/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from __future__ import annotations

import base64
import contextlib
import json
import logging
Expand Down Expand Up @@ -146,19 +147,43 @@ def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
return
if self.auth_token and self.repo_url.startswith("https://"):
original_url = self.repo_url
encoded_user = urlquote(self.user_name, safe="")
encoded_token = urlquote(self.auth_token, safe="")
self.repo_url = self.repo_url.replace("https://", f"https://{encoded_user}:{encoded_token}@", 1)
self._set_http_auth_env(original_url)
elif self.auth_token and self.repo_url.startswith("http://"):
original_url = self.repo_url
encoded_user = urlquote(self.user_name, safe="")
encoded_token = urlquote(self.auth_token, safe="")
self.repo_url = self.repo_url.replace("http://", f"http://{encoded_user}:{encoded_token}@", 1)
self._set_http_auth_env(original_url)
elif self.repo_url.startswith("http://"):
# if no auth token, use the repo url as is
pass
elif not self.repo_url.startswith("git@") and not self.repo_url.startswith("https://"):
self.repo_url = os.path.expanduser(self.repo_url)

def _set_http_auth_env(self, repo_url: str) -> None:
"""
Set git config env vars to force HTTP authentication via extraHeader.

Git does not send credentials for public repositories since the server
does not respond with a 401 challenge. This forces the Authorization
header to be sent on every request, allowing authenticated rate limits.

Uses GIT_CONFIG_* environment variables (git >= 2.31) to inject an
``http.<URL>.extraHeader`` scoped to ``repo_url``. The URL-scoped form
ensures the Authorization header is only attached when git contacts
the configured repository, not when it follows cross-host redirects
or fetches submodules from other origins.
"""
credentials = f"{self.user_name}:{self.auth_token}"
encoded = base64.b64encode(credentials.encode()).decode()
self.env["GIT_CONFIG_COUNT"] = "1"
self.env["GIT_CONFIG_KEY_0"] = f"http.{repo_url}.extraHeader"
self.env["GIT_CONFIG_VALUE_0"] = f"Authorization: Basic {encoded}"
Comment on lines +175 to +185
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

http.extraHeader is configured globally via GIT_CONFIG_KEY_0=http.extraHeader, so the Authorization header may be sent to any HTTP(S) endpoint Git contacts during this operation (redirects, alternates, or other HTTP remotes), which is a credential-leak risk. Consider scoping the config to the target host by using Git’s per-URL config form (e.g. http.<url>.extraHeader derived from the repo URL without embedded credentials), so the header is only attached for requests to that host/prefix.

Copilot uses AI. Check for mistakes.

def set_git_env(self, key: str | None = None) -> None:
self.env["GIT_SSH_COMMAND"] = self._build_ssh_command(key)

Expand Down
33 changes: 33 additions & 0 deletions providers/git/tests/unit/git/hooks/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from __future__ import annotations

import base64
import os

import pytest
Expand Down Expand Up @@ -352,3 +353,35 @@ def test_passphrase_askpass_cleaned_up(self, create_connection_without_db):
assert os.path.exists(askpass_path)
# Both the askpass script and the temp key file should be cleaned up
assert not os.path.exists(askpass_path)

def test_https_auth_sets_extra_header(self):
"""Test that HTTPS connections with auth token set URL-scoped http.<URL>.extraHeader env vars.

This forces git to send credentials on the first request, even for public
repositories that don't issue a 401 challenge (issue #54829). The URL
scope limits the Authorization header to the configured repository so it
is not leaked to cross-host redirects or submodule origins.
"""
hook = GitHook(git_conn_id=CONN_HTTPS)
expected_creds = base64.b64encode(f"user:{ACCESS_TOKEN}".encode()).decode()
assert hook.env["GIT_CONFIG_COUNT"] == "1"
assert hook.env["GIT_CONFIG_KEY_0"] == f"http.{AIRFLOW_HTTPS_URL}.extraHeader"
assert hook.env["GIT_CONFIG_VALUE_0"] == f"Authorization: Basic {expected_creds}"

def test_http_auth_sets_extra_header(self):
"""Test that HTTP connections with auth token also set URL-scoped http.<URL>.extraHeader."""
hook = GitHook(git_conn_id=CONN_HTTP)
expected_creds = base64.b64encode(f"user:{ACCESS_TOKEN}".encode()).decode()
assert hook.env["GIT_CONFIG_COUNT"] == "1"
assert hook.env["GIT_CONFIG_KEY_0"] == f"http.{AIRFLOW_HTTP_URL}.extraHeader"
assert hook.env["GIT_CONFIG_VALUE_0"] == f"Authorization: Basic {expected_creds}"

def test_no_auth_does_not_set_extra_header(self):
"""Test that connections without auth token do not set http.extraHeader."""
hook = GitHook(git_conn_id=CONN_HTTP_NO_AUTH)
assert "GIT_CONFIG_COUNT" not in hook.env

def test_ssh_does_not_set_extra_header(self):
"""Test that SSH connections do not set http.extraHeader."""
hook = GitHook(git_conn_id=CONN_DEFAULT)
assert "GIT_CONFIG_COUNT" not in hook.env
Loading