Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions .github/workflows/test-htcondor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
name: HTCondor Integration Test

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
htcondor-test:
runs-on: ubuntu-latest

steps:
- name: Checkout repository code
uses: actions/checkout@v4

- name: Start HTCondor Mini-Cluster Container
run: |
# Spin up the container, mapping the local repository workspace to /job
docker run -d \
--name condor-local \
-v "${{ github.workspace }}/sandbox":/job \
-w /job \
htcondor/mini:el9

- name: Setup Test User and Directory Permissions
run: |
# Create the unprivileged user inside the container
docker exec condor-local useradd -m test_user

# Fix ownership so test_user can write stdout/logs back to the host filesystem
docker exec condor-local chown -R test_user:test_user /job

- name: Verify Workspace Inside Container
run: |
# Diagnostic step to ensure dummy_job.sub and payload.sh are present
docker exec condor-local ls -la /job

- name: Wait for HTCondor Daemons to be Ready
run: |
echo "Waiting for the HTCondor schedd daemon to boot up..."
# Try up to 30 times with a 1-second interval
for i in {1..30}; do
if docker exec -u test_user condor-local condor_status -schedd > /dev/null 2>&1; then
echo "HTCondor is ready!"
exit 0
fi
echo "Still waiting... ($i/30)"
sleep 1
done
echo "Error: HTCondor daemons failed to start within 30 seconds."
exit 1

- name: Submit Dummy Job
run: |
# Cluster number will always be 1, as we use a fresh container
docker exec -u test_user condor-local condor_submit dummy_job.sub

- name: Wait for Job Completion
run: |
# Using "condor_watch_q -summary -exit all,done -clusters 1" would have been better,
# but it tries to use TTY for rendering information, which fails in GH workflows
# So, use a more complex condor_q method...
echo "Monitoring Cluster ID: 1..."
for i in {1..60}; do
# condor_q prints the Cluster ID only if the job is active/idle/running.
# If the output is empty, the job has left the queue and is complete.
STATUS=$(docker exec -u test_user condor-local condor_q 1 -format "%d" ClusterId)

if [ -z "$STATUS" ]; then
echo "Job left the queue (Completed)!"
exit 0
fi

echo "Job is still pending/processing... ($i/60)"
sleep 1
done

echo "Error: Job did not finish within 60 seconds."
exit 1
- name: Verify Job Output Logs
run: |
# Check the output file written back to the GitHub workspace runner
echo "=== Content of job.out ==="
cat sandbox/job.out

# Check the standard error to ensure no underlying bash glitches occurred
echo "=== Content of job.err ==="
cat sandbox/job.err

# 1. Assert that our expected string exists in the output file
if ! grep -q "Hello from native HTCondor!" sandbox/job.out; then
echo "Failure: Expected job output string not found."
exit 1
fi

# 2. Assert that the error log is completely empty
if [ -s sandbox/job.err ]; then
echo "Failure: job.err is not empty! Something went wrong during execution."
exit 2
fi

echo "Success: Job completed perfectly with no errors!"

- name: Cleanup Container
if: always() # Ensures the container is torn down even if the tests fail
run: |
docker rm -f condor-local
12 changes: 12 additions & 0 deletions sandbox/dummy_job.sub
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# dummy_job.sub
universe = vanilla
executable = payload.sh

output = job.out
error = job.err
log = job.log

should_transfer_files = YES
when_to_transfer_output = ON_EXIT

queue 1
4 changes: 4 additions & 0 deletions sandbox/payload.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
echo "Hello from native HTCondor!"
uname -r
sleep 5
Loading