11#! /bin/bash
22set -euo pipefail
33
4+ # Logging function with timestamps
5+ log () {
6+ echo " [$( date ' +%Y-%m-%d %H:%M:%S' ) ] $1 "
7+ }
8+
49# Parameters
510MODE=" ${1:- seed} " # 'seed' (Drizzle) or 'bypass'
611EVAL_FILE=" ${2:- eval-codebuff.json} " # eval file name
712COMMIT_INDEX=" ${3:- 0} " # commit index
813
9- echo " 🚀 Remote Evaluation Parameters:"
10- echo " Mode: $MODE "
11- echo " Eval File: $EVAL_FILE "
12- echo " Commit Index: $COMMIT_INDEX "
14+ log " 🚀 Remote Evaluation Infrastructure Starting"
15+ log " 📋 Parameters:"
16+ log " Mode: $MODE "
17+ log " Eval File: $EVAL_FILE "
18+ log " Commit Index: $COMMIT_INDEX "
19+ log " Working Directory: $( pwd) "
20+ log " Script Directory: $( dirname " $0 " ) "
1321
1422export CODEBUFF_WEBSOCKET_URL=" ws://127.0.0.1:4242/ws"
1523export CODEBUFF_SKIP_BINARY_CHECK=1
1624
1725# Start services
18- echo " 📦 Starting Docker services..."
26+ log " 📦 Starting Docker services..."
27+ log " Compose file: $( dirname " $0 " ) /../docker-compose.evals.yml"
1928docker compose -f " $( dirname " $0 " ) /../docker-compose.evals.yml" up -d --build db backend
2029
2130# Wait for backend to be ready
22- echo " ⏳ Waiting for backend to be ready..."
31+ log " ⏳ Waiting for backend to be ready..."
32+ START_TIME=$( date +%s)
2333" $( dirname " $0 " ) /wait-for-healthz.sh" " http://127.0.0.1:4242/healthz" 90 || {
24- echo ' ❌ Healthz failed; dumping backend logs...'
34+ log ' ❌ Health check failed; dumping logs...'
35+ log ' 📋 Backend logs:'
2536 docker compose -f " $( dirname " $0 " ) /../docker-compose.evals.yml" logs backend --tail=200 || true
37+ log ' 📋 Database logs:'
38+ docker compose -f " $( dirname " $0 " ) /../docker-compose.evals.yml" logs db --tail=50 || true
2639 exit 1
2740}
41+ READY_TIME=$( date +%s)
42+ log " ✅ Backend ready in $(( READY_TIME - START_TIME)) seconds"
2843
2944# Set up authentication
3045if [ " $MODE " = " bypass" ]; then
31- echo " 🔐 Using bypass authentication..."
46+ log " 🔐 Setting up bypass authentication..."
3247 export CODEBUFF_TEST_AUTH_TOKEN=" $( openssl rand -hex 16) "
3348 export CODEBUFF_API_KEY=" $CODEBUFF_TEST_AUTH_TOKEN "
49+ log " Generated test auth token: ${CODEBUFF_TEST_AUTH_TOKEN: 0: 8} ..."
3450else
35- echo " 🌱 Using database seed authentication..."
36- # Drizzle seed via compose for network access to db
51+ log " 🌱 Setting up database seed authentication..."
52+ log " Running seeder container..."
53+ SEED_START=$( date +%s)
3754 KEY_LINE=$( docker compose -f " $( dirname " $0 " ) /../docker-compose.evals.yml" run --rm seeder | tail -n1)
3855 export CODEBUFF_API_KEY=" ${KEY_LINE# CODEBUFF_API_KEY=} "
56+ SEED_END=$( date +%s)
57+ log " Seeding completed in $(( SEED_END - SEED_START)) seconds"
58+ log " Extracted API key: ${CODEBUFF_API_KEY: 0: 8} ..."
3959fi
4060
4161# Run evaluation
42- echo " 🤖 Running evaluation..."
43- echo " File: evals/git-evals/$EVAL_FILE "
44- echo " Commit Index: $COMMIT_INDEX "
62+ log " 🤖 Starting evaluation..."
63+ log " File: evals/git-evals/$EVAL_FILE "
64+ log " Commit Index: $COMMIT_INDEX "
65+ log " Environment: CODEBUFF_WEBSOCKET_URL=$CODEBUFF_WEBSOCKET_URL "
66+ log " This may take 10-30 minutes depending on task complexity..."
4567
68+ EVAL_START=$( date +%s)
4669bun evals/git-evals/run-single-eval.ts \
4770 --eval-file=" evals/git-evals/$EVAL_FILE " \
4871 --commit-index=" $COMMIT_INDEX "
4972
73+ EVAL_EXIT_CODE=$?
74+ EVAL_END=$( date +%s)
75+ EVAL_DURATION=$(( EVAL_END - EVAL_START))
76+
77+ if [ $EVAL_EXIT_CODE -eq 0 ]; then
78+ log " ✅ Evaluation completed successfully in ${EVAL_DURATION} seconds!"
79+ else
80+ log " ❌ Evaluation failed with exit code $EVAL_EXIT_CODE after ${EVAL_DURATION} seconds"
81+ log " 📋 Final backend logs:"
82+ docker compose -f " $( dirname " $0 " ) /../docker-compose.evals.yml" logs backend --tail=100 || true
83+ fi
84+
5085# Cleanup
51- echo " 🧹 Cleaning up..."
86+ log " 🧹 Cleaning up Docker containers ..."
5287docker compose -f " $( dirname " $0 " ) /../docker-compose.evals.yml" down -v
5388
54- echo " ✅ Remote evaluation completed successfully!"
89+ TOTAL_DURATION=$(( EVAL_END - START_TIME))
90+ log " 🏁 Remote evaluation finished in ${TOTAL_DURATION} total seconds (exit code: $EVAL_EXIT_CODE )"
91+ exit $EVAL_EXIT_CODE
0 commit comments