From 352823de3a478b49d2a0913c4f8df9f8d5a3c641 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 2 Jul 2026 13:11:35 +0200 Subject: [PATCH 1/8] feat: provide GH workflow for htcondor tests --- .github/workflows/test-htcondor.yml | 78 +++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 .github/workflows/test-htcondor.yml diff --git a/.github/workflows/test-htcondor.yml b/.github/workflows/test-htcondor.yml new file mode 100644 index 0000000..9424e6d --- /dev/null +++ b/.github/workflows/test-htcondor.yml @@ -0,0 +1,78 @@ +name: HTCondor Integration Test + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + htcondor-test: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository code + uses: actions/checkout@v4 + + - name: Start HTCondor Mini-Cluster Container + run: | + # Spin up the container, mapping the local repository workspace to /job + docker run -d \ + --name condor-local \ + -v "${{ github.workspace }}/sandbox":/job \ + -w /job \ + htcondor/mini:el9 + + - name: Setup Test User and Directory Permissions + run: | + # Create the unprivileged user inside the container + docker exec condor-local useradd -m test_user + + # Fix ownership so test_user can write stdout/logs back to the host filesystem + docker exec condor-local chown -R test_user:test_user /job + + - name: Verify Workspace Inside Container + run: | + # Diagnostic step to ensure dummy_job.sub and payload.sh are present + docker exec condor-local ls -la /job + + - name: Submit Dummy Job + run: | + # Cluster number will always be 1, as we use a fresh container + docker exec -u test_user condor-local condor_submit dummy_job.sub + + - name: Wait for Job Completion + run: | + echo "Monitoring Cluster ID: 1" + docker exec -u test_user condor-local condor_watch_q -exit all,done -clusters 1 + + - name: Verify Job Output Logs + run: | + # Check the output file written back to the GitHub workspace runner + echo "=== Content of job.out ===" + cat sandbox/job.out + + # Check the standard error to ensure no underlying bash glitches occurred + echo "=== Content of job.err ===" + cat sandbox/job.err + + # 1. Assert that our expected string exists in the output file + if ! grep -q "Hello from native HTCondor!" sandbox/job.out; then + echo "Failure: Expected job output string not found." + exit 1 + fi + + # 2. Assert that the error log is completely empty + if [ -s sandbox/job.err ]; then + echo "Failure: job.err is not empty! Something went wrong during execution." + exit 2 + fi + + echo "Success: Job completed perfectly with no errors!" + + - name: Cleanup Container + if: always() # Ensures the container is torn down even if the tests fail + run: | + docker rm -f condor-local \ No newline at end of file From 5ecc8a590e056fac27763f47fa7576c41a991b23 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 2 Jul 2026 13:12:04 +0200 Subject: [PATCH 2/8] feat: test job description for HTCondor --- sandbox/dummy_job.sub | 12 ++++++++++++ sandbox/payload.sh | 4 ++++ 2 files changed, 16 insertions(+) create mode 100755 sandbox/dummy_job.sub create mode 100755 sandbox/payload.sh diff --git a/sandbox/dummy_job.sub b/sandbox/dummy_job.sub new file mode 100755 index 0000000..5ed5d1d --- /dev/null +++ b/sandbox/dummy_job.sub @@ -0,0 +1,12 @@ +# dummy_job.sub +universe = vanilla +executable = payload.sh + +output = job.out +error = job.err +log = job.log + +should_transfer_files = YES +when_to_transfer_output = ON_EXIT + +queue 1 diff --git a/sandbox/payload.sh b/sandbox/payload.sh new file mode 100755 index 0000000..9b1be72 --- /dev/null +++ b/sandbox/payload.sh @@ -0,0 +1,4 @@ +#!/bin/bash +echo "Hello from native HTCondor!" +uname -r +sleep 5 From 3e678292b9ef96bc593cbf4fbd576791c6bf5e8f Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 2 Jul 2026 13:26:10 +0200 Subject: [PATCH 3/8] fix: pre-commit fixup applied --- .github/workflows/test-htcondor.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-htcondor.yml b/.github/workflows/test-htcondor.yml index 9424e6d..7c4fdd5 100644 --- a/.github/workflows/test-htcondor.yml +++ b/.github/workflows/test-htcondor.yml @@ -29,7 +29,7 @@ jobs: run: | # Create the unprivileged user inside the container docker exec condor-local useradd -m test_user - + # Fix ownership so test_user can write stdout/logs back to the host filesystem docker exec condor-local chown -R test_user:test_user /job @@ -47,23 +47,23 @@ jobs: run: | echo "Monitoring Cluster ID: 1" docker exec -u test_user condor-local condor_watch_q -exit all,done -clusters 1 - + - name: Verify Job Output Logs run: | # Check the output file written back to the GitHub workspace runner echo "=== Content of job.out ===" cat sandbox/job.out - + # Check the standard error to ensure no underlying bash glitches occurred echo "=== Content of job.err ===" cat sandbox/job.err - + # 1. Assert that our expected string exists in the output file if ! grep -q "Hello from native HTCondor!" sandbox/job.out; then echo "Failure: Expected job output string not found." exit 1 fi - + # 2. Assert that the error log is completely empty if [ -s sandbox/job.err ]; then echo "Failure: job.err is not empty! Something went wrong during execution." @@ -75,4 +75,4 @@ jobs: - name: Cleanup Container if: always() # Ensures the container is torn down even if the tests fail run: | - docker rm -f condor-local \ No newline at end of file + docker rm -f condor-local From ba9f09f082ca2540400470ca8eb67158bd9a6f17 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 2 Jul 2026 13:27:00 +0200 Subject: [PATCH 4/8] test sleeping for 10 secs before submitting a job --- .github/workflows/test-htcondor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-htcondor.yml b/.github/workflows/test-htcondor.yml index 7c4fdd5..72d177c 100644 --- a/.github/workflows/test-htcondor.yml +++ b/.github/workflows/test-htcondor.yml @@ -41,6 +41,7 @@ jobs: - name: Submit Dummy Job run: | # Cluster number will always be 1, as we use a fresh container + echo "Before submitting this job, sleep for 10 seconds" && sleep 10 docker exec -u test_user condor-local condor_submit dummy_job.sub - name: Wait for Job Completion From e830277ac7b0da53df0db51b809aaa6a5aee67f1 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 2 Jul 2026 13:35:22 +0200 Subject: [PATCH 5/8] fix: probe condor_schedd every second; tabular output for condor_watch_q --- .github/workflows/test-htcondor.yml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-htcondor.yml b/.github/workflows/test-htcondor.yml index 72d177c..c3791c3 100644 --- a/.github/workflows/test-htcondor.yml +++ b/.github/workflows/test-htcondor.yml @@ -38,16 +38,31 @@ jobs: # Diagnostic step to ensure dummy_job.sub and payload.sh are present docker exec condor-local ls -la /job + - name: Wait for HTCondor Daemons to be Ready + run: | + echo "Waiting for the HTCondor schedd daemon to boot up..." + # Try up to 30 times with a 1-second interval + for i in {1..30}; do + if docker exec -u test_user condor-local condor_status -schedd > /dev/null 2>&1; then + echo "HTCondor is ready!" + exit 0 + fi + echo "Still waiting... ($i/30)" + sleep 1 + done + echo "Error: HTCondor daemons failed to start within 30 seconds." + exit 1 + - name: Submit Dummy Job run: | # Cluster number will always be 1, as we use a fresh container - echo "Before submitting this job, sleep for 10 seconds" && sleep 10 docker exec -u test_user condor-local condor_submit dummy_job.sub - name: Wait for Job Completion run: | + # -table is an option for CI workflows that do not have an TTY attached echo "Monitoring Cluster ID: 1" - docker exec -u test_user condor-local condor_watch_q -exit all,done -clusters 1 + docker exec -u test_user condor-local condor_watch_q -table -exit all,done -clusters 1 - name: Verify Job Output Logs run: | From f4a4fff76b457ec61abb0740d71197a828027d86 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 2 Jul 2026 13:42:46 +0200 Subject: [PATCH 6/8] fix: use -summary option for condor_watch_q --- .github/workflows/test-htcondor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-htcondor.yml b/.github/workflows/test-htcondor.yml index c3791c3..7e5e5d9 100644 --- a/.github/workflows/test-htcondor.yml +++ b/.github/workflows/test-htcondor.yml @@ -62,7 +62,7 @@ jobs: run: | # -table is an option for CI workflows that do not have an TTY attached echo "Monitoring Cluster ID: 1" - docker exec -u test_user condor-local condor_watch_q -table -exit all,done -clusters 1 + docker exec -u test_user condor-local condor_watch_q -summary -exit all,done -clusters 1 - name: Verify Job Output Logs run: | From 8d5f97ff27405fb4838195a71c36fc69257f8b1f Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 2 Jul 2026 13:52:56 +0200 Subject: [PATCH 7/8] fix: replace condor_watch_q by condor_q in a loop --- .github/workflows/test-htcondor.yml | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-htcondor.yml b/.github/workflows/test-htcondor.yml index 7e5e5d9..66a4801 100644 --- a/.github/workflows/test-htcondor.yml +++ b/.github/workflows/test-htcondor.yml @@ -60,10 +60,26 @@ jobs: - name: Wait for Job Completion run: | - # -table is an option for CI workflows that do not have an TTY attached - echo "Monitoring Cluster ID: 1" - docker exec -u test_user condor-local condor_watch_q -summary -exit all,done -clusters 1 - + # Using "condor_watch_q -summary -exit all,done -clusters 1" would have been better, + # but it tries to use TTY for rendering information, which fails in GH workflows + # So, use a more complex condor_q method... + echo "Monitoring Cluster ID: 1..." + for i in {1..60}; do + # condor_q prints the Cluster ID only if the job is active/idle/running. + # If the output is empty, the job has left the queue and is complete. + STATUS=$(docker exec -u test_user condor-local condor_q 1 -format "%d" ClusterId) + + if [ -z "$STATUS" ]; then + echo "Job left the queue (Completed)!" + exit 0 + fi + + echo "Job is still pending/processing... ($i/60)" + sleep 1 + done + + echo "Error: Job did not finish within 60 seconds." + exit 1 - name: Verify Job Output Logs run: | # Check the output file written back to the GitHub workspace runner From e31d385fb7c414955b4f75492ff8ec4e6788134c Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 2 Jul 2026 13:57:21 +0200 Subject: [PATCH 8/8] another round of pre-commit --- .github/workflows/test-htcondor.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-htcondor.yml b/.github/workflows/test-htcondor.yml index 66a4801..eeaedc5 100644 --- a/.github/workflows/test-htcondor.yml +++ b/.github/workflows/test-htcondor.yml @@ -68,16 +68,16 @@ jobs: # condor_q prints the Cluster ID only if the job is active/idle/running. # If the output is empty, the job has left the queue and is complete. STATUS=$(docker exec -u test_user condor-local condor_q 1 -format "%d" ClusterId) - + if [ -z "$STATUS" ]; then echo "Job left the queue (Completed)!" exit 0 fi - + echo "Job is still pending/processing... ($i/60)" sleep 1 done - + echo "Error: Job did not finish within 60 seconds." exit 1 - name: Verify Job Output Logs