Skip to content

Commit aa2f386

Browse files
committed
[remote-eval] feat: add comprehensive logging to remote eval infrastructure
1 parent 2c21b43 commit aa2f386

6 files changed

Lines changed: 232 additions & 56 deletions

File tree

.github/workflows/remote-evals.yml

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,38 @@ jobs:
6464
COMMIT_INDEX: ${{ inputs.commit_index || '0' }}
6565
MODE: ${{ inputs.mode || 'bypass' }}
6666
run: |
67-
echo "🚀 Starting remote evaluation..."
67+
echo "🚀 Remote Evaluation Starting"
68+
echo "📋 GitHub Actions Environment:"
69+
echo " Runner: ${{ runner.os }}"
70+
echo " SHA: ${{ github.sha }}"
71+
echo " Ref: ${{ github.ref }}"
72+
echo " Event: ${{ github.event_name }}"
73+
echo " Eval File: $EVAL_FILE"
74+
echo " Commit Index: $COMMIT_INDEX"
75+
echo " Mode: $MODE"
76+
echo "🐳 Docker Info:"
77+
docker --version
78+
docker compose version
79+
echo "💾 Disk Space:"
80+
df -h
81+
echo "🔧 Starting evaluation..."
6882
bash evals/scripts/run-remote-parameterized.sh "$MODE" "$EVAL_FILE" "$COMMIT_INDEX"
6983
84+
- name: Dump logs on failure
85+
if: failure() && steps.check_commit.outputs.should_run_evals == 'true'
86+
run: |
87+
echo "❌ Evaluation failed - dumping diagnostic information"
88+
echo "🐳 Docker containers status:"
89+
docker ps -a || true
90+
echo "📋 Backend container logs:"
91+
docker compose -f evals/docker-compose.evals.yml logs backend --tail=200 || true
92+
echo "📋 Database container logs:"
93+
docker compose -f evals/docker-compose.evals.yml logs db --tail=100 || true
94+
echo "💾 Disk usage:"
95+
df -h || true
96+
echo "🧠 Memory usage:"
97+
free -h || true
98+
7099
- name: Upload evaluation logs
71100
if: always() && steps.check_commit.outputs.should_run_evals == 'true'
72101
uses: actions/upload-artifact@v4
@@ -75,14 +104,16 @@ jobs:
75104
path: |
76105
evals/test-repos/
77106
debug/
107+
~/.cache/bun/
78108
retention-days: 7
79109

80110
- name: Cleanup containers
81111
if: always() && steps.check_commit.outputs.should_run_evals == 'true'
82112
run: |
83-
echo "🧹 Cleaning up Docker containers..."
113+
echo "🧹 Final cleanup - removing all containers and volumes..."
84114
docker compose -f evals/docker-compose.evals.yml down -v || true
85115
docker system prune -f || true
116+
echo "✅ Cleanup completed"
86117
87118
# Optional: Matrix job to run multiple evaluations in parallel
88119
remote-evals-matrix:

evals/docker-compose.evals.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,21 @@ services:
99
"postgres",
1010
"-c", "fsync=off",
1111
"-c", "synchronous_commit=off",
12-
"-c", "full_page_writes=off"
12+
"-c", "full_page_writes=off",
13+
"-c", "log_statement=all",
14+
"-c", "log_destination=stderr",
15+
"-c", "logging_collector=off"
1316
]
1417
healthcheck:
1518
test: ["CMD-SHELL", "pg_isready -U codebuff -d codebuff"]
1619
interval: 5s
1720
timeout: 3s
1821
retries: 20
22+
logging:
23+
driver: "json-file"
24+
options:
25+
max-size: "10m"
26+
max-file: "3"
1927

2028
backend:
2129
build:
@@ -74,6 +82,11 @@ services:
7482
interval: 5s
7583
timeout: 3s
7684
retries: 30
85+
logging:
86+
driver: "json-file"
87+
options:
88+
max-size: "50m"
89+
max-file: "3"
7790

7891
seeder:
7992
image: oven/bun:1.1.34
Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,91 @@
11
#!/bin/bash
22
set -euo pipefail
33

4+
# Logging function with timestamps
5+
log() {
6+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
7+
}
8+
49
# Parameters
510
MODE="${1:-seed}" # 'seed' (Drizzle) or 'bypass'
611
EVAL_FILE="${2:-eval-codebuff.json}" # eval file name
712
COMMIT_INDEX="${3:-0}" # commit index
813

9-
echo "🚀 Remote Evaluation Parameters:"
10-
echo " Mode: $MODE"
11-
echo " Eval File: $EVAL_FILE"
12-
echo " Commit Index: $COMMIT_INDEX"
14+
log "🚀 Remote Evaluation Infrastructure Starting"
15+
log "📋 Parameters:"
16+
log " Mode: $MODE"
17+
log " Eval File: $EVAL_FILE"
18+
log " Commit Index: $COMMIT_INDEX"
19+
log " Working Directory: $(pwd)"
20+
log " Script Directory: $(dirname "$0")"
1321

1422
export CODEBUFF_WEBSOCKET_URL="ws://127.0.0.1:4242/ws"
1523
export CODEBUFF_SKIP_BINARY_CHECK=1
1624

1725
# Start services
18-
echo "📦 Starting Docker services..."
26+
log "📦 Starting Docker services..."
27+
log " Compose file: $(dirname "$0")/../docker-compose.evals.yml"
1928
docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" up -d --build db backend
2029

2130
# Wait for backend to be ready
22-
echo "⏳ Waiting for backend to be ready..."
31+
log "⏳ Waiting for backend to be ready..."
32+
START_TIME=$(date +%s)
2333
"$(dirname "$0")/wait-for-healthz.sh" "http://127.0.0.1:4242/healthz" 90 || {
24-
echo '❌ Healthz failed; dumping backend logs...'
34+
log '❌ Health check failed; dumping logs...'
35+
log '📋 Backend logs:'
2536
docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" logs backend --tail=200 || true
37+
log '📋 Database logs:'
38+
docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" logs db --tail=50 || true
2639
exit 1
2740
}
41+
READY_TIME=$(date +%s)
42+
log "✅ Backend ready in $((READY_TIME - START_TIME)) seconds"
2843

2944
# Set up authentication
3045
if [ "$MODE" = "bypass" ]; then
31-
echo "🔐 Using bypass authentication..."
46+
log "🔐 Setting up bypass authentication..."
3247
export CODEBUFF_TEST_AUTH_TOKEN="$(openssl rand -hex 16)"
3348
export CODEBUFF_API_KEY="$CODEBUFF_TEST_AUTH_TOKEN"
49+
log " Generated test auth token: ${CODEBUFF_TEST_AUTH_TOKEN:0:8}..."
3450
else
35-
echo "🌱 Using database seed authentication..."
36-
# Drizzle seed via compose for network access to db
51+
log "🌱 Setting up database seed authentication..."
52+
log " Running seeder container..."
53+
SEED_START=$(date +%s)
3754
KEY_LINE=$(docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" run --rm seeder | tail -n1)
3855
export CODEBUFF_API_KEY="${KEY_LINE#CODEBUFF_API_KEY=}"
56+
SEED_END=$(date +%s)
57+
log " Seeding completed in $((SEED_END - SEED_START)) seconds"
58+
log " Extracted API key: ${CODEBUFF_API_KEY:0:8}..."
3959
fi
4060

4161
# Run evaluation
42-
echo "🤖 Running evaluation..."
43-
echo " File: evals/git-evals/$EVAL_FILE"
44-
echo " Commit Index: $COMMIT_INDEX"
62+
log "🤖 Starting evaluation..."
63+
log " File: evals/git-evals/$EVAL_FILE"
64+
log " Commit Index: $COMMIT_INDEX"
65+
log " Environment: CODEBUFF_WEBSOCKET_URL=$CODEBUFF_WEBSOCKET_URL"
66+
log " This may take 10-30 minutes depending on task complexity..."
4567

68+
EVAL_START=$(date +%s)
4669
bun evals/git-evals/run-single-eval.ts \
4770
--eval-file="evals/git-evals/$EVAL_FILE" \
4871
--commit-index="$COMMIT_INDEX"
4972

73+
EVAL_EXIT_CODE=$?
74+
EVAL_END=$(date +%s)
75+
EVAL_DURATION=$((EVAL_END - EVAL_START))
76+
77+
if [ $EVAL_EXIT_CODE -eq 0 ]; then
78+
log "✅ Evaluation completed successfully in ${EVAL_DURATION} seconds!"
79+
else
80+
log "❌ Evaluation failed with exit code $EVAL_EXIT_CODE after ${EVAL_DURATION} seconds"
81+
log "📋 Final backend logs:"
82+
docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" logs backend --tail=100 || true
83+
fi
84+
5085
# Cleanup
51-
echo "🧹 Cleaning up..."
86+
log "🧹 Cleaning up Docker containers..."
5287
docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" down -v
5388

54-
echo "✅ Remote evaluation completed successfully!"
89+
TOTAL_DURATION=$((EVAL_END - START_TIME))
90+
log "🏁 Remote evaluation finished in ${TOTAL_DURATION} total seconds (exit code: $EVAL_EXIT_CODE)"
91+
exit $EVAL_EXIT_CODE

evals/scripts/run-remote.sh

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,65 @@
11
#!/bin/bash
22
set -euo pipefail
3+
4+
# Logging function
5+
log() {
6+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
7+
}
8+
39
MODE="${1:-seed}" # 'seed' (Drizzle) or 'bypass'
10+
log "🚀 Starting remote evaluation infrastructure"
11+
log "Mode: $MODE"
12+
413
export CODEBUFF_WEBSOCKET_URL="ws://127.0.0.1:4242/ws"
5-
export CODEBUFF_SKIP_BINARY_CHECK=1 # after skip flag is added
14+
export CODEBUFF_SKIP_BINARY_CHECK=1
15+
log "Environment variables set:"
16+
log " CODEBUFF_WEBSOCKET_URL=$CODEBUFF_WEBSOCKET_URL"
17+
log " CODEBUFF_SKIP_BINARY_CHECK=$CODEBUFF_SKIP_BINARY_CHECK"
618

719
# Start services
20+
log "📦 Starting Docker services (db + backend)..."
821
docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" up -d --build db backend
22+
23+
log "⏳ Waiting for backend health check..."
924
"$(dirname "$0")/wait-for-healthz.sh" "http://127.0.0.1:4242/healthz" 90 || {
10-
echo 'Healthz failed; dumping backend logs...'
25+
log '❌ Health check failed; dumping backend logs...'
1126
docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" logs backend --tail=200 || true
27+
log '❌ Dumping database logs...'
28+
docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" logs db --tail=50 || true
1229
exit 1
1330
}
1431

1532
if [ "$MODE" = "bypass" ]; then
33+
log "🔐 Setting up bypass authentication..."
1634
export CODEBUFF_TEST_AUTH_TOKEN="$(openssl rand -hex 16)"
1735
export CODEBUFF_API_KEY="$CODEBUFF_TEST_AUTH_TOKEN"
36+
log " Generated test auth token: ${CODEBUFF_TEST_AUTH_TOKEN:0:8}..."
1837
else
19-
# Drizzle seed via compose for network access to db
38+
log "🌱 Setting up database seed authentication..."
39+
log " Running seeder container..."
2040
KEY_LINE=$(docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" run --rm seeder | tail -n1)
2141
export CODEBUFF_API_KEY="${KEY_LINE#CODEBUFF_API_KEY=}"
42+
log " Extracted API key: ${CODEBUFF_API_KEY:0:8}..."
2243
fi
2344

45+
log "🤖 Starting evaluation..."
46+
log " Eval file: evals/git-evals/eval-codebuff.json"
47+
log " Commit index: 0"
48+
log " This may take 10-30 minutes depending on task complexity..."
49+
2450
bun evals/git-evals/run-single-eval.ts \
2551
--eval-file="evals/git-evals/eval-codebuff.json" \
2652
--commit-index=0
2753

28-
docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" down -v
54+
EVAL_EXIT_CODE=$?
55+
if [ $EVAL_EXIT_CODE -eq 0 ]; then
56+
log "✅ Evaluation completed successfully!"
57+
else
58+
log "❌ Evaluation failed with exit code $EVAL_EXIT_CODE"
59+
fi
60+
61+
log "🧹 Cleaning up Docker containers..."
62+
docker compose -f "$(dirname "$0")/../docker-compose.evals.yml" down -v
63+
64+
log "🏁 Remote evaluation finished (exit code: $EVAL_EXIT_CODE)"
65+
exit $EVAL_EXIT_CODE

evals/scripts/wait-for-healthz.sh

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,32 @@
11
#!/bin/bash
22
set -euo pipefail
3+
4+
# Logging function
5+
log() {
6+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
7+
}
8+
39
URL="$1"; TIMEOUT="${2:-60}"
10+
log "🏥 Health check starting"
11+
log " URL: $URL"
12+
log " Timeout: ${TIMEOUT}s"
13+
414
for i in $(seq 1 "$TIMEOUT"); do
5-
if curl -fsS "$URL" >/dev/null 2>&1; then exit 0; fi
15+
if curl -fsS "$URL" >/dev/null 2>&1; then
16+
log "✅ Backend is healthy and ready!"
17+
exit 0
18+
fi
19+
20+
# Log every 10 seconds to avoid spam
21+
if [ $((i % 10)) -eq 0 ] || [ $i -le 5 ]; then
22+
log "⏳ Waiting for backend... (${i}s / ${TIMEOUT}s)"
23+
fi
24+
625
sleep 1
7-
echo "waiting for backend... ($i s)"
826
done
9-
echo "backend healthz did not become ready in $TIMEOUT seconds" >&2
27+
28+
log "❌ Backend health check failed after $TIMEOUT seconds" >&2
29+
log "🔍 Final health check attempt..."
30+
RESPONSE=$(curl -s -w "HTTP_CODE:%{http_code}" "$URL" 2>/dev/null || echo "CURL_FAILED")
31+
log " Response: $RESPONSE"
1032
exit 1

0 commit comments

Comments
 (0)