CCExtractor · pulk17 · Mar 11, 2026 · Mar 11, 2026 · Mar 18, 2026 · canihavesomecoffee
@@ -7,6 +7,11 @@
 
 DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 
+# Enable coredump capture
+ulimit -c unlimited
+mkdir -p /tmp/coredumps
+echo "/tmp/coredumps/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern > /dev/null
+
 if [ ! -f "$DIR/variables" ]; then
     # No variable file defined
     sudo shutdown -h now
@@ -124,8 +129,65 @@ if [ -e "${dstDir}/ccextractor" ]; then
         ./ccextractor --version >> "${logFile}" 2>&1
         echo "=== End Version Info ===" >> "${logFile}"
         postStatus "testing" "Running tests"
+        ccextractor_path="$(pwd)/ccextractor"
+        combined_stdout="/tmp/combined_stdout.log"
+        : > "${combined_stdout}"
+
+        # Create a wrapper script that tees stdout/stderr to a combined log
+        wrapper_path="$(pwd)/ccextractor_wrapper"
+        cat > "${wrapper_path}" << 'WRAPPER_EOF'
+#!/bin/bash
+COMBINED_LOG="/tmp/combined_stdout.log"
+REAL_BINARY="PLACEHOLDER_BINARY"
+EXIT_CODE_FILE="/tmp/.wrapper_exit_code"
+echo "=== TEST INVOCATION: $@ ===" >> "$COMBINED_LOG"
+{ "$REAL_BINARY" "$@" 2>&1; echo $? > "$EXIT_CODE_FILE"; } | tee -a "$COMBINED_LOG"
+exit_code=$(cat "$EXIT_CODE_FILE")
+echo "=== EXIT CODE: ${exit_code} ===" >> "$COMBINED_LOG"
+echo "" >> "$COMBINED_LOG"
+exit $exit_code
+WRAPPER_EOF
+        sed -i "s|PLACEHOLDER_BINARY|${ccextractor_path}|" "${wrapper_path}"
+        chmod +x "${wrapper_path}"
+
         executeCommand cd ${suiteDstDir}
-        executeCommand ${tester} --debug --entries "${testFile}" --executable "ccextractor" --tempfolder "${tempFolder}" --timeout 600 --reportfolder "${reportFolder}" --resultfolder "${resultFolder}" --samplefolder "${sampleFolder}" --method Server --url "${reportURL}"
+        executeCommand ${tester} --debug --entries "${testFile}" --executable "${wrapper_path}" --tempfolder "${tempFolder}" --timeout 600 --reportfolder "${reportFolder}" --resultfolder "${resultFolder}" --samplefolder "${sampleFolder}" --method Server --url "${reportURL}"
+
+        # Upload AI artifacts to GCS
+        gcs_bucket=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/bucket" -H "Metadata-Flavor: Google")
+        test_id=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/testID" -H "Metadata-Flavor: Google")
+        token=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" -H "Metadata-Flavor: Google" | python3 -c "import sys,json; print(json.load(sys.stdin)['access_token'])")
+
+        upload_artifact() {
+            local file_path="$1"
+            local dest_path="$2"
+            if [ -f "$file_path" ]; then
+                local http_code
+                http_code=$(curl -s -X POST --data-binary @"$file_path" \
+                    -H "Authorization: Bearer $token" \
+                    -H "Content-Type: application/octet-stream" \
+                    -w "%{http_code}" \
+                    -o /dev/null \
+                    "https://storage.googleapis.com/upload/storage/v1/b/${gcs_bucket}/o?uploadType=media&name=${dest_path}")
+                if [ -z "$http_code" ] || [ "$http_code" -ne 200 ]; then
+                    echo "GCS upload failed for ${dest_path}: HTTP ${http_code:-no_response}" >> "${logFile}"
+                fi
+            fi
+        }
+
+        upload_artifact "$ccextractor_path" "test_artifacts/${test_id}/ccextractor"
+
+        # Upload combined stdout log
+        upload_artifact "${combined_stdout}" "test_artifacts/${test_id}/combined_stdout.log"
+
+        # Upload coredumps if any
+        for core_file in /tmp/coredumps/core.*; do
+            if [ -f "$core_file" ]; then
+                upload_artifact "$core_file" "test_artifacts/${test_id}/coredump"
+                break
+            fi
+        done
+
         sendLogFile
         postStatus "completed" "Ran all tests"
 

@@ -1193,7 +1193,8 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict:
         metadata_items = [
             {'key': 'startup-script', 'value': startup_script},
             {'key': 'reportURL', 'value': reportURL},
-            {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')}
+            {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')},
+            {'key': 'testID', 'value': str(test.id)}
         ]
     elif test.platform == TestPlatform.windows:
         image_response = compute.images().getFromFamily(project=config.get('WINDOWS_INSTANCE_PROJECT_NAME', ''),
@@ -1209,7 +1210,8 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict:
             {'key': 'service_account', 'value': service_account},
             {'key': 'rclone_conf', 'value': rclone_conf},
             {'key': 'reportURL', 'value': reportURL},
-            {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')}
+            {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')},
+            {'key': 'testID', 'value': str(test.id)}
         ]
     source_disk_image = image_response['selfLink']
 
@@ -2625,7 +2627,7 @@ def upload_log_type_request(log, test_id, repo_folder, test, request) -> bool:
         uploaded_file.save(temp_path)
         final_path = os.path.join(repo_folder, 'LogFiles', f"{test.id}.txt")
 
-        os.rename(temp_path, final_path)
+        os.replace(temp_path, final_path)
         log.debug("Stored log file")
         return True
 
@@ -2671,7 +2673,7 @@ def upload_type_request(log, test_id, repo_folder, test, request) -> bool:
         results_dir = os.path.join(repo_folder, 'TestResults')
         os.makedirs(results_dir, exist_ok=True)
         final_path = os.path.join(results_dir, f'{file_hash}{file_extension}')
-        os.rename(temp_path, final_path)
+        os.replace(temp_path, final_path)
         rto = RegressionTestOutput.query.filter(
             RegressionTestOutput.id == request.form['test_file_id']).first()
         result_file = TestResultFile(test.id, request.form['test_id'], rto.id, rto.correct, file_hash)

@@ -367,7 +367,6 @@
 
 
 @mod_test.route('/log-files/<test_id>')
-@login_required
 def download_build_log_file(test_id):
     """
     Serve download of build log.
@@ -379,15 +378,17 @@
     :return: build log text file
     :rtype: Flask response
     """
-    from run import config
+    from run import config, storage_client_bucket
     test = Test.query.filter(Test.id == test_id).first()
 
+    from flask import send_from_directory
+
     if test is not None:
         file_name = f"{test_id}.txt"
-        log_file_path = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'LogFiles', file_name)
-
+        log_dir = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'LogFiles')
+        log_file_path = os.path.join(log_dir, file_name)
         if os.path.isfile(log_file_path):
-            return serve_file_download(file_name, 'LogFiles')
+            return send_from_directory(log_dir, file_name, as_attachment=True)
 
         raise TestNotFoundException(f"Build log for Test {test_id} not found")
 
@@ -442,3 +443,247 @@
     g.db.commit()
     g.log.info(f"test with id: {test_id} stopped")
     return redirect(url_for('.by_id', test_id=test.id))
+
+
+def _artifact_redirect(blob_path, filename='artifact'):
+    """Generate a signed URL for a GCS artifact and redirect, or 404."""
+    from datetime import timedelta
+
+    from run import config, storage_client_bucket
+
+    blob = storage_client_bucket.blob(blob_path)
+    if not blob.exists():
+        abort(404)
+    url = blob.generate_signed_url(
+        version="v4",
+        expiration=timedelta(minutes=config.get('GCS_SIGNED_URL_EXPIRY_LIMIT', 30)),
+        method="GET",
+        response_disposition=f'attachment; filename="{filename}"'
+    )
+    return redirect(url)
+
+
+@mod_test.route('/<int:test_id>/binary', methods=['GET'])
+def download_binary(test_id):
+    """Download the ccextractor binary used in a test (linux or windows)."""
+    from run import storage_client_bucket
+    # Try linux name first, then windows
+    for name in ['ccextractor', 'ccextractor.exe']:
+        blob_path = f'test_artifacts/{test_id}/{name}'
+        if storage_client_bucket.blob(blob_path).exists():
+            return _artifact_redirect(blob_path, filename=name)
+    abort(404)
+
+
+@mod_test.route('/<int:test_id>/coredump', methods=['GET'])
+def download_coredump(test_id):
+    """Download the coredump from a test, if one was produced."""
+    return _artifact_redirect(
+        f'test_artifacts/{test_id}/coredump',
+        filename=f'coredump-{test_id}'
+    )
+
+
+@mod_test.route('/<int:test_id>/combined-stdout', methods=['GET'])
+def download_combined_stdout(test_id):
+    """Download the combined stdout/stderr log from all test invocations."""
+    return _artifact_redirect(
+        f'test_artifacts/{test_id}/combined_stdout.log',
+        filename=f'combined_stdout-{test_id}.log'
+    )
+
+
+@mod_test.route('/<int:test_id>/regression/<int:regression_test_id>/<int:output_id>/output-got', methods=['GET'])
+def download_output_got(test_id, regression_test_id, output_id):
+    """Download the actual output file from TestResults using DB hash."""
+    rf = TestResultFile.query.filter(and_(
+        TestResultFile.test_id == test_id,
+        TestResultFile.regression_test_id == regression_test_id,
+        TestResultFile.regression_test_output_id == output_id
+    )).first()
+    if rf is None or rf.got is None:
+        abort(404)
+    ext = os.path.splitext(rf.regression_test_output.filename_correct)[1]
+    return _artifact_redirect(
+        f'TestResults/{rf.got}{ext}',
+        filename=f'output_got_{regression_test_id}_{output_id}{ext}'
+    )
+
+
+@mod_test.route('/<int:test_id>/regression/<int:regression_test_id>/<int:output_id>/output-expected', methods=['GET'])
+def download_output_expected(test_id, regression_test_id, output_id):
+    """Download the expected output file from TestResults using DB hash."""
+    rf = TestResultFile.query.filter(and_(
+        TestResultFile.test_id == test_id,
+        TestResultFile.regression_test_id == regression_test_id,
+        TestResultFile.regression_test_output_id == output_id
+    )).first()
+    if rf is None:
+        abort(404)
+    ext = os.path.splitext(rf.regression_test_output.filename_correct)[1]
+    return _artifact_redirect(
+        f'TestResults/{rf.expected}{ext}',
+        filename=f'output_expected_{regression_test_id}_{output_id}{ext}'
+    )
+@mod_test.route('/<int:test_id>/sample/<int:sample_id>', methods=['GET'])
+def download_sample_ai(test_id, sample_id):
+    """Download the sample file for a regression test (no auth required for AI workflow)."""
+    from mod_sample.models import Sample
+    sample = Sample.query.filter(Sample.id == sample_id).first()
+    if sample is None:
+        abort(404)
+    return _artifact_redirect(
+        f'TestFiles/{sample.filename}',
+        filename=sample.original_name
+    )
+
+
+def _process_test_case(test_id, category_name, t_data):
+    """Helper function to process a single test case."""
+    rt = t_data['test']
+    result = t_data['result']
+    is_error = t_data.get('error', False)
+    result_files = t_data['files']
+
+    outputs = []
+    for expected_output in rt.output_files:
+        if expected_output.ignore:
+            continue
+
+        matched_rf = None
+        for rf in result_files:
+            if rf.test_id != -1 and rf.regression_test_output_id == expected_output.id:
+                matched_rf = rf
+                break
+
+        got_url = None
+        diff_url = None
+
+        if matched_rf and matched_rf.got is not None:
+            got_url = url_for(
+                '.download_output_got',
+                test_id=test_id,
+                regression_test_id=rt.id,
+                output_id=expected_output.id,
+                _external=True
+            )
+            diff_url = url_for(
+                '.generate_diff',
+                test_id=test_id,
+                regression_test_id=rt.id,
+                output_id=expected_output.id,
+                to_view=0,
+                _external=True
+            )
+        else:
+            # If test passed, got and expected match exactly.
+            got_url = url_for(
+                '.download_output_expected',
+                test_id=test_id,
+                regression_test_id=rt.id,
+                output_id=expected_output.id,
+                _external=True
+            )
+
+        output_entry = {
+            'output_id': expected_output.id,
+            'correct_extension': expected_output.correct_extension,
+            'expected_url': url_for(
+                '.download_output_expected',
+                test_id=test_id,
+                regression_test_id=rt.id,
+                output_id=expected_output.id,
+                _external=True
+            ),
+            'got_url': got_url,
+            'diff_url': diff_url,
+        }
+        outputs.append(output_entry)
+
+    return {
+        'regression_test_id': rt.id,
+        'category': category_name,
+        'sample_filename': rt.sample.original_name,
+        'sample_url': url_for(
+            '.download_sample_ai',
+            test_id=test_id,
+            sample_id=rt.sample.id,
+            _external=True
+        ),
+        'arguments': rt.command,
+        'result': 'Fail' if is_error else 'Pass',
+        'exit_code': result.exit_code if result else None,
+        'expected_exit_code': result.expected_rc if result else None,
+        'runtime_ms': result.runtime if result else None,
+        'outputs': outputs,
+        'how_to_reproduce': f'./ccextractor {rt.command} {rt.sample.original_name}',
+    }
+
+
+@mod_test.route('/<int:test_id>/ai.json', methods=['GET'])
+def ai_json_endpoint(test_id):
+    """Structured JSON with download URLs for all artifacts — for AI agents."""
+    from run import storage_client_bucket
+
+    test = Test.query.filter(Test.id == test_id).first()
+    if test is None:
+        return jsonify({'error': f'Test {test_id} not found'}), 404
+
+    def blob_exists(path):
+        return storage_client_bucket.blob(path).exists()
+
+    has_binary = (
+        blob_exists(f'test_artifacts/{test_id}/ccextractor') or
+        blob_exists(f'test_artifacts/{test_id}/ccextractor.exe')
+    )
+    has_coredump = blob_exists(f'test_artifacts/{test_id}/coredump')
+    has_combined_stdout = blob_exists(f'test_artifacts/{test_id}/combined_stdout.log')
+
+    results = get_test_results(test)
+    test_cases = []
+    total = 0
+    passed = 0
+    failed = 0
+
+    for category in results:
+        for t_data in category['tests']:
+            total += 1
+            if t_data.get('error', False):
+                failed += 1
+            else:
+                passed += 1
+
+            test_cases.append(_process_test_case(test_id, category['category'].name, t_data))
+
+    report = {
+        'test_id': test.id,
+        'commit': test.commit,
+        'platform': test.platform.value,
+        'branch': test.branch,
+        'status': 'completed' if test.finished else 'running',
+        'binary_url': url_for(
+            '.download_binary', test_id=test_id, _external=True
+        ) if has_binary else None,
+        'coredump_url': url_for(
+            '.download_coredump', test_id=test_id, _external=True
+        ) if has_coredump else None,
+        'log_url': url_for(
+            '.download_build_log_file', test_id=test_id, _external=True
+        ),
+        'combined_stdout_url': url_for(
+            '.download_combined_stdout', test_id=test_id, _external=True
+        ) if has_combined_stdout else None,
+        'summary': {
+            'total': total,
+            'passed': passed,
+            'failed': failed,
+        },
+        'test_cases': test_cases,
+        'how_to_reproduce': (
+            'Download the binary and sample, then run: '
+            + ('./ccextractor {arguments} {sample_filename}' if test.platform.value == 'linux'
+               else 'ccextractorwinfull.exe {arguments} {sample_filename}')
+        ),
+    }
+
+    return jsonify(report)