Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion scripts/services/git-integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ x-env-args: &env-args
NODE_ENV: docker
SERVICE: git-integration
SHELL: /bin/sh
REPO_STORAGE_ROOT: /var/lib/crowdgit

services:
git-integration:
build:
context: ../../
dockerfile: ./scripts/services/docker/Dockerfile.git_integration
working_dir: /usr/crowd/app/services/apps/git_integration
working_dir: /usr/crowd/app
env_file:
- ../../backend/.env.dist.local
- ../../backend/.env.dist.composed
Expand All @@ -23,6 +24,8 @@ services:
- crowd-bridge
ports:
- '8085:8085'
volumes:
- git-integration-repos:/var/lib/crowdgit

git-integration-dev:
build:
Expand All @@ -44,7 +47,11 @@ services:
- crowd-bridge
volumes:
- ../../services/apps/git_integration/src:/usr/crowd/app/services/apps/git_integration/src
- git-integration-repos:/var/lib/crowdgit

networks:
crowd-bridge:
external: true

volumes:
git-integration-repos:
48 changes: 40 additions & 8 deletions services/apps/git_integration/src/crowdgit/database/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
from crowdgit.models.repository import Repository
from crowdgit.models.service_execution import ServiceExecution
from crowdgit.settings import (
FAILED_RETRY_INTERVAL_HOURS,
MAX_CONCURRENT_ONBOARDINGS,
MAX_INTEGRATION_RESULTS,
REPOSITORY_UPDATE_INTERVAL_HOURS,
STUCK_REPO_TIMEOUT_HOURS,
)

from .connection import get_db_connection
Expand Down Expand Up @@ -74,7 +76,7 @@ async def acquire_onboarding_repo() -> Repository | None:
JOIN git."repositoryProcessing" rp ON rp."repositoryId" = r.id
CROSS JOIN current_onboarding_count c
WHERE rp.state = $2
AND rp."lockedAt" IS NULL
AND (rp."lockedAt" IS NULL OR rp."lockedAt" < NOW() - INTERVAL '1 hour' * $4::numeric)
AND r."deletedAt" IS NULL
AND c.count < $3
ORDER BY rp.priority ASC, rp."createdAt" ASC
Expand All @@ -93,7 +95,12 @@ async def acquire_onboarding_repo() -> Repository | None:
"""
return await acquire_repository(
onboarding_repo_sql_query,
(RepositoryState.PROCESSING, RepositoryState.PENDING, MAX_CONCURRENT_ONBOARDINGS),
(
RepositoryState.PROCESSING,
RepositoryState.PENDING,
MAX_CONCURRENT_ONBOARDINGS,
STUCK_REPO_TIMEOUT_HOURS,
),
)


Expand Down Expand Up @@ -141,9 +148,11 @@ async def acquire_recurrent_repo() -> Repository | None:
FROM public.repositories r
JOIN git."repositoryProcessing" rp ON rp."repositoryId" = r.id
WHERE NOT (rp.state = ANY($2))
AND rp."lockedAt" IS NULL
AND (rp."lockedAt" IS NULL OR rp."lockedAt" < NOW() - INTERVAL '1 hour' * $4::numeric)
AND r."deletedAt" IS NULL
AND rp."lastProcessedAt" < NOW() - INTERVAL '1 hour' * $3
AND rp."lastProcessedAt" < NOW() - INTERVAL '1 hour' * (
CASE WHEN rp.state = 'failed' THEN $5::numeric ELSE $3::numeric END
)
AND NOT (
r.url LIKE '%gerrit.automotivelinux.org%'
AND EXISTS (SELECT 1 FROM automotivelinux_processing)
Expand All @@ -170,7 +179,13 @@ async def acquire_recurrent_repo() -> Repository | None:
)
return await acquire_repository(
recurrent_repo_sql_query,
(RepositoryState.PROCESSING, states_to_exclude, REPOSITORY_UPDATE_INTERVAL_HOURS),
(
RepositoryState.PROCESSING,
states_to_exclude,
REPOSITORY_UPDATE_INTERVAL_HOURS,
STUCK_REPO_TIMEOUT_HOURS,
FAILED_RETRY_INTERVAL_HOURS,
),
)
Comment thread
themarolt marked this conversation as resolved.


Expand All @@ -190,14 +205,20 @@ async def can_onboard_more():


async def acquire_pending_reonboard_repo() -> Repository | None:
"""Acquire a pending_reonboard repo for re-onboarding (only called on weekends)."""
"""Acquire a pending_reonboard repo for re-onboarding (only called on weekends).

PENDING_REONBOARD is no longer produced automatically (ReOnboardingRequiredError was removed
in CM-1185). This function stays to drain any legacy rows that pre-date the change, and to
allow the CM-1186 backfill script to set state='pending_reonboard' for repos that need a
full re-ingest on the weekend.
"""
pending_reonboard_sql_query = f"""
WITH selected_repo AS (
SELECT r.id
FROM public.repositories r
JOIN git."repositoryProcessing" rp ON rp."repositoryId" = r.id
WHERE rp.state = $1
AND rp."lockedAt" IS NULL
AND (rp."lockedAt" IS NULL OR rp."lockedAt" < NOW() - INTERVAL '1 hour' * $3::numeric)
AND r."deletedAt" IS NULL
ORDER BY rp.priority ASC, rp."lastProcessedAt" ASC
LIMIT 1
Expand All @@ -218,7 +239,7 @@ async def acquire_pending_reonboard_repo() -> Repository | None:
"""
return await acquire_repository(
pending_reonboard_sql_query,
(RepositoryState.PENDING_REONBOARD, RepositoryState.PROCESSING),
(RepositoryState.PENDING_REONBOARD, RepositoryState.PROCESSING, STUCK_REPO_TIMEOUT_HOURS),
Comment thread
themarolt marked this conversation as resolved.
)
Comment thread
themarolt marked this conversation as resolved.
Comment thread
themarolt marked this conversation as resolved.


Expand Down Expand Up @@ -268,6 +289,17 @@ async def release_repo(repo_id: str):
return str(result)


async def update_lock_heartbeat(repo_id: str):
"""Refresh lockedAt timestamp for an actively-processing repo to prevent stale-lock reclaim."""
sql_query = """
UPDATE git."repositoryProcessing"
SET "lockedAt" = NOW(),
"updatedAt" = NOW()
WHERE "repositoryId" = $1
"""
await execute(sql_query, (repo_id,))
Comment thread
themarolt marked this conversation as resolved.
Comment thread
themarolt marked this conversation as resolved.


async def update_last_processed_commit(repo_id: str, commit_hash: str, branch: str | None = None):
"""
Update last processed commit and optionally the branch after processing
Expand Down
2 changes: 1 addition & 1 deletion services/apps/git_integration/src/crowdgit/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class NetworkError(CrowdGitError):


@dataclass
class PermissionError(CrowdGitError):
class RepoPermissionError(CrowdGitError):
error_message: str = "Permission denied"
error_code: ErrorCode = ErrorCode.PERMISSION_ERROR
Comment thread
themarolt marked this conversation as resolved.

Expand Down
13 changes: 3 additions & 10 deletions services/apps/git_integration/src/crowdgit/models/clone_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,9 @@ class CloneBatchInfo(BaseModel):
latest_commit_in_repo: str | None = Field(
None, description="Hash of the latest commit in repo"
)
edge_commit: str | None = Field(
default=None,
description="The oldest commit in the current batch, used to track progress during incremental processing.",
)
prev_batch_edge_commit: str | None = Field(
default=None,
description="The edge commit from the previous batch, used to track progress during incremental processing.",
)
clone_with_batches: bool = Field(
default=True, description="Whether repo is cloned with batches"
branch_changed: bool = Field(
default=False,
description="True when default branch changed and clone was wiped — last_processed_commit is stale and must be ignored",
)

class Config:
Expand Down
Loading
Loading