From 083b0cd73f440a92b3068925de22c02288255cd8 Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Tue, 12 May 2026 17:51:50 +0300 Subject: [PATCH 01/14] [ci][fix] Export `APP_PID` to $GITHUB_ENV in `integration-quarkus-langchain4j.yml` --- .github/workflows/integration-quarkus-langchain4j.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration-quarkus-langchain4j.yml b/.github/workflows/integration-quarkus-langchain4j.yml index 8ee6e900..96577689 100644 --- a/.github/workflows/integration-quarkus-langchain4j.yml +++ b/.github/workflows/integration-quarkus-langchain4j.yml @@ -146,7 +146,8 @@ jobs: -Dquarkus.http.port=$QUARKUS_PORT \ -jar target/quarkus-app/quarkus-run.jar & APP_PID=$! - + echo "APP_PID=$APP_PID" >> $GITHUB_ENV + if [ -z "$APP_PID" ]; then echo "ERROR: Failed to start Quarkus application" exit 1 From 62fb9bb2faae1695eeddcf0afba13471dbd5638d Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Tue, 12 May 2026 17:58:25 +0300 Subject: [PATCH 02/14] [ci][fix] Add permission check for workflow `/rerun` triggers to control/secure ci load --- .github/workflows/rerun-workflow.yml | 32 +++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/.github/workflows/rerun-workflow.yml b/.github/workflows/rerun-workflow.yml index e4b14e75..62eec7d3 100644 --- a/.github/workflows/rerun-workflow.yml +++ b/.github/workflows/rerun-workflow.yml @@ -47,8 +47,30 @@ jobs: core.setOutput('is_help', 'false'); } - - name: Get PR SHA + - name: Check commenter permissions + id: check_permission if: steps.help.outputs.is_help != 'true' + uses: actions/github-script@v7 + with: + script: | + const { data: perm } = await github.rest.repos.getCollaboratorPermissionLevel({ + owner: context.repo.owner, + repo: context.repo.repo, + username: context.payload.comment.user.login + }); + const authorized = ['write', 'admin'].includes(perm.permission); + if (!authorized) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: `@${context.payload.comment.user.login} You need write permission to trigger workflow reruns.` + }); + } + core.setOutput('authorized', authorized ? 'true' : 'false'); + + - name: Get PR SHA + if: steps.help.outputs.is_help != 'true' && steps.check_permission.outputs.authorized == 'true' id: pr uses: actions/github-script@v7 with: @@ -64,7 +86,7 @@ jobs: console.log(`PR head ref: ${pr.head.ref}`); - name: Add reaction to comment - if: steps.help.outputs.is_help != 'true' + if: steps.help.outputs.is_help != 'true' && steps.check_permission.outputs.authorized == 'true' uses: actions/github-script@v7 with: script: | @@ -76,7 +98,7 @@ jobs: }); - name: Post start comment - if: steps.help.outputs.is_help != 'true' + if: steps.help.outputs.is_help != 'true' && steps.check_permission.outputs.authorized == 'true' uses: actions/github-script@v7 with: script: | @@ -92,7 +114,7 @@ jobs: }); - name: Rerun failed workflows - if: steps.help.outputs.is_help != 'true' + if: steps.help.outputs.is_help != 'true' && steps.check_permission.outputs.authorized == 'true' uses: actions/github-script@v7 with: script: | @@ -167,7 +189,7 @@ jobs: console.log(`Reran ${rerunCount} workflow(s)`); - name: Post completion comment - if: always() && steps.help.outputs.is_help != 'true' + if: always() && steps.help.outputs.is_help != 'true' && steps.check_permission.outputs.authorized == 'true' uses: actions/github-script@v7 with: script: | From b653629458436be6ac588713080e9ab27b7c448f Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Tue, 12 May 2026 17:59:24 +0300 Subject: [PATCH 03/14] [ci][fix] Clone Quarkus LangChain4j with shallow depth to optimize checkout process --- .github/workflows/integration-quarkus-langchain4j.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration-quarkus-langchain4j.yml b/.github/workflows/integration-quarkus-langchain4j.yml index 96577689..681ee095 100644 --- a/.github/workflows/integration-quarkus-langchain4j.yml +++ b/.github/workflows/integration-quarkus-langchain4j.yml @@ -108,7 +108,7 @@ jobs: - name: Clone Quarkus LangChain4j run: | cd ${{ github.workspace }} - git clone https://github.com/quarkiverse/quarkus-langchain4j.git + git clone --depth 1 https://github.com/quarkiverse/quarkus-langchain4j.git # Step 4: Build Quarkus LangChain4j with current GPULlama3.java - name: Build Quarkus LangChain4j From b5ce0c63a0714bdbf7c41c96c58e2dd3a37f754c Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Tue, 12 May 2026 18:07:34 +0300 Subject: [PATCH 04/14] [ci][fix] Minimize redundant export statements by appending to $GITHUB_PATH once --- .github/workflows/build-and-run.yml | 22 ++----------------- .../integration-quarkus-langchain4j.yml | 6 ++--- 2 files changed, 4 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml index a17c9228..16fc75c9 100644 --- a/.github/workflows/build-and-run.yml +++ b/.github/workflows/build-and-run.yml @@ -85,7 +85,8 @@ jobs: # Save for subsequent steps echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV - echo "PATH=$PATH" >> $GITHUB_ENV + echo "$FULL_SDK/bin" >> $GITHUB_PATH + echo "$JAVA_HOME/bin" >> $GITHUB_PATH echo "=== Checking tornado CLI ===" which tornado || { echo "::error::tornado not in PATH"; exit 1; } @@ -95,7 +96,6 @@ jobs: run: | cd ${{ github.workspace }} echo "Using TORNADOVM_HOME=$TORNADOVM_HOME" - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" tornado --version ./mvnw clean package -DskipTests @@ -107,7 +107,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ --prompt "Say hello" @@ -130,7 +129,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ --prompt "Say hello" \ @@ -154,7 +152,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ --prompt "Say hello" \ @@ -180,7 +177,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --ptx \ --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ --prompt "Say hello" \ @@ -206,7 +202,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --ptx \ --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ --prompt "Say hello" \ @@ -232,7 +227,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Qwen3-4B-f16.gguf \ --prompt "Say hello" @@ -255,7 +249,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Mistral-7B-Instruct-v0.3.fp16.gguf \ --prompt "Say hello" @@ -278,7 +271,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/qwen2.5-1.5b-instruct-fp16.gguf \ --prompt "Say hello" @@ -301,7 +293,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \ --prompt "Say hello" @@ -324,7 +315,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/granite-3.2-2b-instruct-f16.gguf \ --prompt "Say hello" @@ -347,7 +337,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/granite-4.0-1b-F16.gguf \ --prompt "Say hello" @@ -370,7 +359,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Llama-3.2-1B-Instruct-Q8_0.gguf \ --prompt "Say hello" @@ -393,7 +381,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Qwen3-0.6B-Q8_0.gguf \ --prompt "Say hello" @@ -416,7 +403,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Phi-3-mini-4k-instruct-Q8_0.gguf \ --prompt "Say hello" @@ -439,7 +425,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/qwen2.5-1.5b-instruct-q8_0.gguf \ --prompt "Say hello" @@ -462,7 +447,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Mistral-7B-Instruct-v0.3.Q8_0.gguf \ --prompt "Say hello" @@ -485,7 +469,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/granite-3.2-2b-instruct-Q8_0.gguf \ --prompt "Say hello" @@ -508,7 +491,6 @@ jobs: -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.json run: | cd ${{ github.workspace }} - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/granite-4.0-1b-Q8_0.gguf \ --prompt "Say hello" diff --git a/.github/workflows/integration-quarkus-langchain4j.yml b/.github/workflows/integration-quarkus-langchain4j.yml index 681ee095..ea872407 100644 --- a/.github/workflows/integration-quarkus-langchain4j.yml +++ b/.github/workflows/integration-quarkus-langchain4j.yml @@ -78,7 +78,8 @@ jobs: # Save for subsequent steps echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV - echo "PATH=$PATH" >> $GITHUB_ENV + echo "$FULL_SDK/bin" >> $GITHUB_PATH + echo "$JAVA_HOME/bin" >> $GITHUB_PATH echo "=== Checking tornado CLI ===" which tornado || { echo "::error::tornado not in PATH"; exit 1; } @@ -89,7 +90,6 @@ jobs: run: | cd ${{ github.workspace }} echo "Using TORNADOVM_HOME=$TORNADOVM_HOME" - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" tornado --version # Append SNAPSHOT to GPULlama3 version @@ -114,7 +114,6 @@ jobs: - name: Build Quarkus LangChain4j run: | cd ${{ github.workspace }}/quarkus-langchain4j - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" # Update the GPULlama3 version used by quarkus-langchain4j POM="pom.xml" @@ -136,7 +135,6 @@ jobs: - name: Start Quarkus Application and Wait for Startup run: | cd ${{ github.workspace }}/quarkus-langchain4j/integration-tests/gpu-llama3 - export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" echo "Starting Quarkus application on port $QUARKUS_PORT..." From e9807f4be80e274f6ff8ed69d4c854554f6f9fd0 Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Tue, 12 May 2026 18:11:31 +0300 Subject: [PATCH 05/14] [ci][fix] Create a reusable `setup-tornadovm` action to simplify and deduplicate TornadoVM setup across workflows --- .github/actions/setup-tornadovm/action.yml | 57 +++++++++++++++++++ .github/workflows/build-and-run.yml | 47 ++------------- .../integration-quarkus-langchain4j.yml | 45 ++------------- 3 files changed, 65 insertions(+), 84 deletions(-) create mode 100644 .github/actions/setup-tornadovm/action.yml diff --git a/.github/actions/setup-tornadovm/action.yml b/.github/actions/setup-tornadovm/action.yml new file mode 100644 index 00000000..17e6b498 --- /dev/null +++ b/.github/actions/setup-tornadovm/action.yml @@ -0,0 +1,57 @@ +name: Setup TornadoVM +description: Clone, build, and configure TornadoVM; exports TORNADOVM_HOME and updates PATH for all subsequent steps. + +inputs: + backend: + description: 'TornadoVM backend to build (opencl or ptx)' + required: true + +runs: + using: composite + steps: + - name: Clone TornadoVM master + shell: bash + run: | + git clone --depth 1 --branch master \ + https://github.com/beehive-lab/TornadoVM.git \ + $TORNADO_ROOT + + - name: Set up Python venv for TornadoVM + shell: bash + run: | + python3 -m venv $TORNADO_ROOT/venv + source $TORNADO_ROOT/venv/bin/activate + python --version + + - name: Build TornadoVM + shell: bash + run: | + cd $TORNADO_ROOT + mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/ + source venv/bin/activate + echo "=== Building TornadoVM ===" + + make BACKEND=${{ inputs.backend }} + + echo "=== Searching for TornadoVM SDK directory ===" + SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ inputs.backend }}" | head -n 1) + if [ -z "$SDK_DIR" ]; then + echo "::error::Could not locate TornadoVM SDK directory!" + find dist -maxdepth 5 -type d + exit 1 + fi + FULL_SDK="${PWD}/${SDK_DIR}" + echo "Detected TornadoVM SDK: $FULL_SDK" + + # Export for current shell session + export TORNADOVM_HOME="$FULL_SDK" + export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH" + + # Save for subsequent steps + echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV + echo "$FULL_SDK/bin" >> $GITHUB_PATH + echo "$JAVA_HOME/bin" >> $GITHUB_PATH + + echo "=== Checking tornado CLI ===" + which tornado || { echo "::error::tornado not in PATH"; exit 1; } + tornado --devices diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml index 16fc75c9..1c5a039b 100644 --- a/.github/workflows/build-and-run.yml +++ b/.github/workflows/build-and-run.yml @@ -48,49 +48,10 @@ jobs: - name: Checkout GPULlama3 uses: actions/checkout@v4 - - name: Clone TornadoVM master - run: | - git clone --depth 1 --branch master \ - https://github.com/beehive-lab/TornadoVM.git \ - $TORNADO_ROOT - - - name: Set up Python venv for TornadoVM - run: | - python3 -m venv $TORNADO_ROOT/venv - source $TORNADO_ROOT/venv/bin/activate - python --version - - - name: Build TornadoVM - run: | - cd $TORNADO_ROOT - mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/ - source venv/bin/activate - echo "=== Building TornadoVM ===" - - make BACKEND=${{ matrix.backend.name }} - - echo "=== Searching for TornadoVM SDK directory ===" - SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ matrix.backend.name }}" | head -n 1) - if [ -z "$SDK_DIR" ]; then - echo "::error::Could not locate TornadoVM SDK directory!" - find dist -maxdepth 5 -type d - exit 1 - fi - FULL_SDK="${PWD}/${SDK_DIR}" - echo "Detected TornadoVM SDK: $FULL_SDK" - - # Export for current shell session - export TORNADOVM_HOME="$FULL_SDK" - export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH" - - # Save for subsequent steps - echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV - echo "$FULL_SDK/bin" >> $GITHUB_PATH - echo "$JAVA_HOME/bin" >> $GITHUB_PATH - - echo "=== Checking tornado CLI ===" - which tornado || { echo "::error::tornado not in PATH"; exit 1; } - tornado --devices + - name: Setup TornadoVM + uses: ./.github/actions/setup-tornadovm + with: + backend: ${{ matrix.backend.name }} - name: Build GPULlama3.java run: | diff --git a/.github/workflows/integration-quarkus-langchain4j.yml b/.github/workflows/integration-quarkus-langchain4j.yml index ea872407..d4a4291e 100644 --- a/.github/workflows/integration-quarkus-langchain4j.yml +++ b/.github/workflows/integration-quarkus-langchain4j.yml @@ -43,47 +43,10 @@ jobs: uses: actions/checkout@v4 # Step 1: Clone and build TornadoVM - - name: Clone TornadoVM master - run: | - git clone --depth 1 --branch master \ - https://github.com/beehive-lab/TornadoVM.git \ - $TORNADO_ROOT - - name: Set up Python venv for TornadoVM - run: | - python3 -m venv $TORNADO_ROOT/venv - source $TORNADO_ROOT/venv/bin/activate - python --version - - name: Build TornadoVM - run: | - cd $TORNADO_ROOT - mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/ - source venv/bin/activate - echo "=== Building TornadoVM ===" - - make BACKEND=${{ matrix.backend.name }} - - echo "=== Searching for TornadoVM SDK directory ===" - SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ matrix.backend.name }}" | head -n 1) - if [ -z "$SDK_DIR" ]; then - echo "::error::Could not locate TornadoVM SDK directory!" - find dist -maxdepth 5 -type d - exit 1 - fi - FULL_SDK="${PWD}/${SDK_DIR}" - echo "Detected TornadoVM SDK: $FULL_SDK" - - # Export for current shell session - export TORNADOVM_HOME="$FULL_SDK" - export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH" - - # Save for subsequent steps - echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV - echo "$FULL_SDK/bin" >> $GITHUB_PATH - echo "$JAVA_HOME/bin" >> $GITHUB_PATH - - echo "=== Checking tornado CLI ===" - which tornado || { echo "::error::tornado not in PATH"; exit 1; } - tornado --devices + - name: Setup TornadoVM + uses: ./.github/actions/setup-tornadovm + with: + backend: ${{ matrix.backend.name }} # Step 2: Build GPULlama3.java - name: Build GPULlama3.java From b3138844ecf41ad418eaa4dafb1b5bc2ca3ca626 Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Tue, 12 May 2026 18:31:07 +0300 Subject: [PATCH 06/14] [ci][fix] Refactor inference steps by introducing a reusable `run-inference` composite action to reduce duplication across workflows --- .github/actions/run-inference/action.yml | 61 +++ .github/workflows/build-and-run.yml | 519 +++++++---------------- 2 files changed, 212 insertions(+), 368 deletions(-) create mode 100644 .github/actions/run-inference/action.yml diff --git a/.github/actions/run-inference/action.yml b/.github/actions/run-inference/action.yml new file mode 100644 index 00000000..21cb9fb3 --- /dev/null +++ b/.github/actions/run-inference/action.yml @@ -0,0 +1,61 @@ +name: Run Inference +description: Run one llama-tornado inference pass and write the metrics + sidecar files. + +inputs: + backend: + description: 'GPU backend (opencl or ptx)' + required: true + model_file: + description: 'Model filename inside $MODELS_DIR (e.g. Llama-3.2-1B-Instruct-F16.gguf)' + required: true + model: + description: 'Human-readable model name for the sidecar (e.g. Llama-3.2-1B-Instruct)' + required: true + quantization: + description: 'Quantization type (e.g. F16, Q8_0)' + required: true + configuration: + description: 'Configuration key for the sidecar (e.g. standard, prefill-decode)' + required: true + flags: + description: 'Extra CLI flags passed to llama-tornado (omit for standard run)' + required: false + default: '' + metrics_file: + description: 'Absolute path for the output metrics JSON file' + required: true + prompt: + description: 'Prompt to pass to the model' + required: false + default: 'Say hello' + +runs: + using: composite + steps: + - name: Run inference + shell: bash + working-directory: ${{ github.workspace }} + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ inputs.metrics_file }} + run: | + # Run inference and emit raw metrics JSON via JAVA_TOOL_OPTIONS + ./llama-tornado --gpu --${{ inputs.backend }} \ + --model $MODELS_DIR/${{ inputs.model_file }} \ + --prompt "${{ inputs.prompt }}" \ + ${{ inputs.flags }} + + # Write metadata sidecar so process_metrics.py can identify each metrics file + SIDECAR="${{ inputs.metrics_file }%.json}.meta.json" + python3 scripts/write_metrics_sidecar.py \ + --out "$SIDECAR" \ + backend="${{ inputs.backend }}" \ + task=llama-inference \ + model_file=${{ inputs.model_file }} \ + model=${{ inputs.model }} \ + quantization=${{ inputs.quantization }} \ + configuration=${{ inputs.configuration }} \ + "flags=${{ inputs.flags }}" \ + prompt="${{ inputs.prompt }}" diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml index 1c5a039b..982c4cfc 100644 --- a/.github/workflows/build-and-run.yml +++ b/.github/workflows/build-and-run.yml @@ -60,411 +60,194 @@ jobs: tornado --version ./mvnw clean package -DskipTests + # ── Llama-3.2-1B: standard + prefill-decode variants, all backends ────────── - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Standard - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=Llama-3.2-1B-Instruct-F16.gguf \ - model=Llama-3.2-1B-Instruct \ - quantization=F16 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: Llama-3.2-1B-Instruct-F16.gguf + model: Llama-3.2-1B-Instruct + quantization: F16 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.json - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ - --prompt "Say hello" \ - --with-prefill-decode - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=Llama-3.2-1B-Instruct-F16.gguf \ - model=Llama-3.2-1B-Instruct \ - quantization=F16 \ - configuration=prefill-decode \ - "flags=--with-prefill-decode" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: Llama-3.2-1B-Instruct-F16.gguf + model: Llama-3.2-1B-Instruct + quantization: F16 + configuration: prefill-decode + flags: --with-prefill-decode + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.json - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ - --prompt "Say hello" \ - --with-prefill-decode --batch-prefill-size 32 - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=Llama-3.2-1B-Instruct-F16.gguf \ - model=Llama-3.2-1B-Instruct \ - quantization=F16 \ - configuration=batch-prefill-decode \ - "flags=--with-prefill-decode --batch-prefill-size 32" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: Llama-3.2-1B-Instruct-F16.gguf + model: Llama-3.2-1B-Instruct + quantization: F16 + configuration: batch-prefill-decode + flags: --with-prefill-decode --batch-prefill-size 32 + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.json # ── PTX-only: CUDA-graph variants ──────────────────────────────────────── - name: PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs if: matrix.backend.name == 'ptx' - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --ptx \ - --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ - --prompt "Say hello" \ - --with-prefill-decode \ - --cuda-graphs - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.meta.json" \ - backend=ptx \ - task=llama-inference \ - model_file=Llama-3.2-1B-Instruct-F16.gguf \ - model=Llama-3.2-1B-Instruct \ - quantization=F16 \ - configuration=prefill-decode-cuda-graphs \ - "flags=--with-prefill-decode --cuda-graphs" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ptx + model_file: Llama-3.2-1B-Instruct-F16.gguf + model: Llama-3.2-1B-Instruct + quantization: F16 + configuration: prefill-decode-cuda-graphs + flags: --with-prefill-decode --cuda-graphs + metrics_file: ${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.json - name: PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode-CUDA-Graphs if: matrix.backend.name == 'ptx' - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --ptx \ - --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ - --prompt "Say hello" \ - --with-prefill-decode --batch-prefill-size 32 \ - --cuda-graphs - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.meta.json" \ - backend=ptx \ - task=llama-inference \ - model_file=Llama-3.2-1B-Instruct-F16.gguf \ - model=Llama-3.2-1B-Instruct \ - quantization=F16 \ - configuration=batch-prefill-decode-cuda-graphs \ - "flags=--with-prefill-decode --batch-prefill-size 32 --cuda-graphs" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ptx + model_file: Llama-3.2-1B-Instruct-F16.gguf + model: Llama-3.2-1B-Instruct + quantization: F16 + configuration: batch-prefill-decode-cuda-graphs + flags: --with-prefill-decode --batch-prefill-size 32 --cuda-graphs + metrics_file: ${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.json # ── Additional models — standard inference, all backends ───────────────── - name: FP16 - Run Qwen3-4B-f16.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/Qwen3-4B-f16.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=Qwen3-4B-f16.gguf \ - model=Qwen3-4B \ - quantization=F16 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: Qwen3-4B-f16.gguf + model: Qwen3-4B + quantization: F16 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.json - name: FP16 - Run Mistral-7B-Instruct-v0.3.fp16.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/Mistral-7B-Instruct-v0.3.fp16.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=Mistral-7B-Instruct-v0.3.fp16.gguf \ - model=Mistral-7B-Instruct-v0.3 \ - quantization=F16 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: Mistral-7B-Instruct-v0.3.fp16.gguf + model: Mistral-7B-Instruct-v0.3 + quantization: F16 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.json - name: FP16 - Run Qwen2.5-1.5b-instruct-fp16.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/qwen2.5-1.5b-instruct-fp16.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=qwen2.5-1.5b-instruct-fp16.gguf \ - model=Qwen2.5-1.5B-Instruct \ - quantization=F16 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: qwen2.5-1.5b-instruct-fp16.gguf + model: Qwen2.5-1.5B-Instruct + quantization: F16 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.json - name: FP16 - Run Phi-3-mini-4k-instruct-fp16.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=Phi-3-mini-4k-instruct-fp16.gguf \ - model=Phi-3-mini-4k-instruct \ - quantization=F16 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: Phi-3-mini-4k-instruct-fp16.gguf + model: Phi-3-mini-4k-instruct + quantization: F16 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.json - name: FP16 - Run Granite-3.2-2b-instruct-f16.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/granite-3.2-2b-instruct-f16.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=granite-3.2-2b-instruct-f16.gguf \ - model=Granite-3.2-2B-Instruct \ - quantization=F16 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: granite-3.2-2b-instruct-f16.gguf + model: Granite-3.2-2B-Instruct + quantization: F16 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.json - name: FP16 - Run Granite-4.0-1b-F16.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/granite-4.0-1b-F16.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=granite-4.0-1b-F16.gguf \ - model=Granite-4.0-1B \ - quantization=F16 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: granite-4.0-1b-F16.gguf + model: Granite-4.0-1B + quantization: F16 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.json - name: Q8 - Run Llama-3.2-1B-Instruct-Q8_0.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/Llama-3.2-1B-Instruct-Q8_0.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=Llama-3.2-1B-Instruct-Q8_0.gguf \ - model=Llama-3.2-1B-Instruct \ - quantization=Q8_0 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: Llama-3.2-1B-Instruct-Q8_0.gguf + model: Llama-3.2-1B-Instruct + quantization: Q8_0 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.json - name: Q8 - Run Qwen3-0.6B-Q8_0.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/Qwen3-0.6B-Q8_0.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=Qwen3-0.6B-Q8_0.gguf \ - model=Qwen3-0.6B \ - quantization=Q8_0 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: Qwen3-0.6B-Q8_0.gguf + model: Qwen3-0.6B + quantization: Q8_0 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.json - name: Q8 - Run Phi-3-mini-4k-instruct-Q8_0.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/Phi-3-mini-4k-instruct-Q8_0.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=Phi-3-mini-4k-instruct-Q8_0.gguf \ - model=Phi-3-mini-4k-instruct \ - quantization=Q8_0 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: Phi-3-mini-4k-instruct-Q8_0.gguf + model: Phi-3-mini-4k-instruct + quantization: Q8_0 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.json - name: Q8 - Run Qwen2.5-1.5b-instruct-q8_0.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/qwen2.5-1.5b-instruct-q8_0.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=qwen2.5-1.5b-instruct-q8_0.gguf \ - model=Qwen2.5-1.5B-Instruct \ - quantization=Q8_0 \ - configuration=standard \ - flags="" \ - prompt="Say hello" - - - name: Q8 - Mistral-7B-Instruct-v0.3.Q8_0.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/Mistral-7B-Instruct-v0.3.Q8_0.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=Mistral-7B-Instruct-v0.3.Q8_0.gguf \ - model=Mistral-7B-Instruct-v0.3 \ - quantization=Q8_0 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: qwen2.5-1.5b-instruct-q8_0.gguf + model: Qwen2.5-1.5B-Instruct + quantization: Q8_0 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.json + + - name: Q8 - Run Mistral-7B-Instruct-v0.3.Q8_0.gguf + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: Mistral-7B-Instruct-v0.3.Q8_0.gguf + model: Mistral-7B-Instruct-v0.3 + quantization: Q8_0 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.json - name: Q8 - Run Granite-3.2-2b-instruct-Q8_0.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/granite-3.2-2b-instruct-Q8_0.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=granite-3.2-2b-instruct-Q8_0.gguf \ - model=Granite-3.2-2B-Instruct \ - quantization=Q8_0 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: granite-3.2-2b-instruct-Q8_0.gguf + model: Granite-3.2-2B-Instruct + quantization: Q8_0 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.json - name: Q8 - Run Granite-4.0-1b-Q8_0.gguf - env: - JAVA_TOOL_OPTIONS: >- - -Dllama.metrics.format=json - -Dllama.metrics.output=file - -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.json - run: | - cd ${{ github.workspace }} - ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model $MODELS_DIR/granite-4.0-1b-Q8_0.gguf \ - --prompt "Say hello" - python3 scripts/write_metrics_sidecar.py \ - --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.meta.json" \ - backend="${{ matrix.backend.name }}" \ - task=llama-inference \ - model_file=granite-4.0-1b-Q8_0.gguf \ - model=Granite-4.0-1B \ - quantization=Q8_0 \ - configuration=standard \ - flags="" \ - prompt="Say hello" + uses: ./.github/actions/run-inference + with: + backend: ${{ matrix.backend.name }} + model_file: granite-4.0-1b-Q8_0.gguf + model: Granite-4.0-1B + quantization: Q8_0 + configuration: standard + metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.json # ── Upload metrics for the publish job ──────────────────────────────────── - name: Upload metrics artifacts From 9e6af04b29085802c14193dbd91eb5efd92a58a9 Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Tue, 12 May 2026 18:49:42 +0300 Subject: [PATCH 07/14] [ci] Cache `setup-tornadovm` action to optimize cross-workflow run times --- .github/actions/setup-tornadovm/action.yml | 42 ++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/.github/actions/setup-tornadovm/action.yml b/.github/actions/setup-tornadovm/action.yml index 17e6b498..a5512658 100644 --- a/.github/actions/setup-tornadovm/action.yml +++ b/.github/actions/setup-tornadovm/action.yml @@ -1,5 +1,5 @@ name: Setup TornadoVM -description: Clone, build, and configure TornadoVM; exports TORNADOVM_HOME and updates PATH for all subsequent steps. +description: Clone (or restore from cache), build, and configure TornadoVM. Exports TORNADOVM_HOME and updates PATH for all subsequent steps. inputs: backend: @@ -9,7 +9,29 @@ inputs: runs: using: composite steps: + - name: Get TornadoVM HEAD SHA + id: tornado_sha + shell: bash + run: | + SHA=$(git ls-remote https://github.com/beehive-lab/TornadoVM HEAD | cut -f1) + echo "sha=$SHA" >> $GITHUB_OUTPUT + + # actions/cache `path:` is an expression — it cannot access the calling + # workflow's env vars via ${{ env.* }}, so we resolve TORNADO_ROOT here. + - name: Resolve TORNADO_ROOT for cache path + id: paths + shell: bash + run: echo "tornado_root=$TORNADO_ROOT" >> $GITHUB_OUTPUT + + - name: Restore TornadoVM cache + id: cache + uses: actions/cache@v4 + with: + path: ${{ steps.paths.outputs.tornado_root }} + key: tornadovm-${{ inputs.backend }}-${{ steps.tornado_sha.outputs.sha }} + - name: Clone TornadoVM master + if: steps.cache.outputs.cache-hit != 'true' shell: bash run: | git clone --depth 1 --branch master \ @@ -17,6 +39,7 @@ runs: $TORNADO_ROOT - name: Set up Python venv for TornadoVM + if: steps.cache.outputs.cache-hit != 'true' shell: bash run: | python3 -m venv $TORNADO_ROOT/venv @@ -24,16 +47,29 @@ runs: python --version - name: Build TornadoVM + if: steps.cache.outputs.cache-hit != 'true' shell: bash run: | cd $TORNADO_ROOT mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/ source venv/bin/activate echo "=== Building TornadoVM ===" - make BACKEND=${{ inputs.backend }} - echo "=== Searching for TornadoVM SDK directory ===" + echo "=== Verifying TornadoVM SDK directory ===" + SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ inputs.backend }}" | head -n 1) + if [ -z "$SDK_DIR" ]; then + echo "::error::Could not locate TornadoVM SDK directory!" + find dist -maxdepth 5 -type d + exit 1 + fi + + # Runs on both cache hit and miss — sets TORNADOVM_HOME and PATH for all + # subsequent steps in the calling workflow. + - name: Configure TornadoVM environment + shell: bash + run: | + cd $TORNADO_ROOT SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ inputs.backend }}" | head -n 1) if [ -z "$SDK_DIR" ]; then echo "::error::Could not locate TornadoVM SDK directory!" From 81a122c8ed79d91cfeddebe938ae47052a83dd51 Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Fri, 22 May 2026 16:37:35 +0300 Subject: [PATCH 08/14] [ci] Parametrize JAVA_HOME setup in ci --- .github/workflows/build-and-run.yml | 17 ++++++++++++++++- .../integration-quarkus-langchain4j.yml | 18 +++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml index 982c4cfc..7a0da1d3 100644 --- a/.github/workflows/build-and-run.yml +++ b/.github/workflows/build-and-run.yml @@ -8,7 +8,7 @@ on: types: [opened, synchronize, reopened] env: - JAVA_HOME: /opt/jenkins/jdks/graal-23.1.0/jdk-21.0.3 + JAVA_VERSION: 21.0.2-open TORNADO_ROOT: ${{ github.workspace }}/GPULlama3.java/external/tornadovm LLAMA_ROOT: ${{ github.workspace }} GRAAL_JARS: /opt/graalJars @@ -45,6 +45,21 @@ jobs: - name: ptx steps: + - name: Set up Java with SDKMAN + shell: bash + run: | + source "$HOME/.sdkman/bin/sdkman-init.sh" + + if ! sdk list java | grep -q "installed.*$JAVA_VERSION"; then + sdk install java "$JAVA_VERSION" + fi + sdk use java "$JAVA_VERSION" + + echo "JAVA_HOME=$HOME/.sdkman/candidates/java/current" >> $GITHUB_ENV + echo "$HOME/.sdkman/candidates/java/current/bin" >> $GITHUB_PATH + + java -version + - name: Checkout GPULlama3 uses: actions/checkout@v4 diff --git a/.github/workflows/integration-quarkus-langchain4j.yml b/.github/workflows/integration-quarkus-langchain4j.yml index d4a4291e..db426bdc 100644 --- a/.github/workflows/integration-quarkus-langchain4j.yml +++ b/.github/workflows/integration-quarkus-langchain4j.yml @@ -21,7 +21,7 @@ on: type: string env: - JAVA_HOME: /opt/jenkins/jdks/graal-23.1.0/jdk-21.0.3 + JAVA_VERSION: 21.0.2-open TORNADO_ROOT: ${{ github.workspace }}/GPULlama3.java/external/tornadovm GRAAL_JARS: /opt/graalJars QUARKUS_PORT: 8081 @@ -39,8 +39,20 @@ jobs: - name: ptx steps: - - name: Checkout GPULlama3 - uses: actions/checkout@v4 + - name: Set up Java with SDKMAN + shell: bash + run: | + source "$HOME/.sdkman/bin/sdkman-init.sh" + + if ! sdk list java | grep -q "installed.*$JAVA_VERSION"; then + sdk install java "$JAVA_VERSION" + fi + sdk use java "$JAVA_VERSION" + + echo "JAVA_HOME=$HOME/.sdkman/candidates/java/current" >> $GITHUB_ENV + echo "$HOME/.sdkman/candidates/java/current/bin" >> $GITHUB_PATH + + java -version # Step 1: Clone and build TornadoVM - name: Setup TornadoVM From e81d599b84810739b5f3c270886458c8c2f13546 Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Fri, 22 May 2026 16:57:22 +0300 Subject: [PATCH 09/14] [ci][fix] Update `setup-tornadovm` action to use `actions/cache/restore@v4` --- .github/actions/setup-tornadovm/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/setup-tornadovm/action.yml b/.github/actions/setup-tornadovm/action.yml index a5512658..1141e4aa 100644 --- a/.github/actions/setup-tornadovm/action.yml +++ b/.github/actions/setup-tornadovm/action.yml @@ -25,7 +25,7 @@ runs: - name: Restore TornadoVM cache id: cache - uses: actions/cache@v4 + uses: actions/cache/restore@v4 with: path: ${{ steps.paths.outputs.tornado_root }} key: tornadovm-${{ inputs.backend }}-${{ steps.tornado_sha.outputs.sha }} From e2a1efd7730e8e1d57156015a31beb98b4d0b280 Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Fri, 22 May 2026 17:06:36 +0300 Subject: [PATCH 10/14] [ci][fix] Refactor workflows to parameterize TornadoVM root paths per backend --- .github/workflows/build-and-run.yml | 4 +++- .github/workflows/integration-quarkus-langchain4j.yml | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml index 7a0da1d3..42b654b6 100644 --- a/.github/workflows/build-and-run.yml +++ b/.github/workflows/build-and-run.yml @@ -9,7 +9,7 @@ on: env: JAVA_VERSION: 21.0.2-open - TORNADO_ROOT: ${{ github.workspace }}/GPULlama3.java/external/tornadovm + TORNADO_BASE_ROOT: ${{ github.workspace }}/GPULlama3.java/external LLAMA_ROOT: ${{ github.workspace }} GRAAL_JARS: /opt/graalJars MODELS_DIR: /opt/models @@ -65,6 +65,8 @@ jobs: - name: Setup TornadoVM uses: ./.github/actions/setup-tornadovm + env: + TORNADO_ROOT: ${{ env.TORNADO_BASE_ROOT }}/tornadovm-${{ matrix.backend.name }} with: backend: ${{ matrix.backend.name }} diff --git a/.github/workflows/integration-quarkus-langchain4j.yml b/.github/workflows/integration-quarkus-langchain4j.yml index db426bdc..25cc2a09 100644 --- a/.github/workflows/integration-quarkus-langchain4j.yml +++ b/.github/workflows/integration-quarkus-langchain4j.yml @@ -22,7 +22,7 @@ on: env: JAVA_VERSION: 21.0.2-open - TORNADO_ROOT: ${{ github.workspace }}/GPULlama3.java/external/tornadovm + TORNADO_BASE_ROOT: ${{ github.workspace }}/GPULlama3.java/external GRAAL_JARS: /opt/graalJars QUARKUS_PORT: 8081 @@ -57,6 +57,8 @@ jobs: # Step 1: Clone and build TornadoVM - name: Setup TornadoVM uses: ./.github/actions/setup-tornadovm + env: + TORNADO_ROOT: ${{ env.TORNADO_BASE_ROOT }}/tornadovm-${{ matrix.backend.name }} with: backend: ${{ matrix.backend.name }} From 3e5e90c300074a04ea93ae57c8c2be5a59ffad3a Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Fri, 22 May 2026 17:21:45 +0300 Subject: [PATCH 11/14] [ci] Fix github syntax issues --- .github/actions/run-inference/action.yml | 3 +- .github/actions/setup-tornadovm/action.yml | 7 +++ .github/workflows/build-and-run.yml | 45 ++++++++++++------- .../integration-quarkus-langchain4j.yml | 25 ++++++----- 4 files changed, 52 insertions(+), 28 deletions(-) diff --git a/.github/actions/run-inference/action.yml b/.github/actions/run-inference/action.yml index 21cb9fb3..314a8be5 100644 --- a/.github/actions/run-inference/action.yml +++ b/.github/actions/run-inference/action.yml @@ -40,6 +40,7 @@ runs: -Dllama.metrics.format=json -Dllama.metrics.output=file -Dllama.metrics.file=${{ inputs.metrics_file }} + METRICS_FILE: ${{ inputs.metrics_file }} run: | # Run inference and emit raw metrics JSON via JAVA_TOOL_OPTIONS ./llama-tornado --gpu --${{ inputs.backend }} \ @@ -48,7 +49,7 @@ runs: ${{ inputs.flags }} # Write metadata sidecar so process_metrics.py can identify each metrics file - SIDECAR="${{ inputs.metrics_file }%.json}.meta.json" + SIDECAR="${METRICS_FILE%.json}.meta.json" python3 scripts/write_metrics_sidecar.py \ --out "$SIDECAR" \ backend="${{ inputs.backend }}" \ diff --git a/.github/actions/setup-tornadovm/action.yml b/.github/actions/setup-tornadovm/action.yml index 1141e4aa..d3742050 100644 --- a/.github/actions/setup-tornadovm/action.yml +++ b/.github/actions/setup-tornadovm/action.yml @@ -91,3 +91,10 @@ runs: echo "=== Checking tornado CLI ===" which tornado || { echo "::error::tornado not in PATH"; exit 1; } tornado --devices + + - name: Save TornadoVM cache + if: steps.cache.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: ${{ steps.paths.outputs.tornado_root }} + key: tornadovm-${{ inputs.backend }}-${{ steps.tornado_sha.outputs.sha }} diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml index 42b654b6..af298a2a 100644 --- a/.github/workflows/build-and-run.yml +++ b/.github/workflows/build-and-run.yml @@ -49,10 +49,12 @@ jobs: shell: bash run: | source "$HOME/.sdkman/bin/sdkman-init.sh" - - if ! sdk list java | grep -q "installed.*$JAVA_VERSION"; then + + # install Java with sdkman, if not already installed + if ! sdk list java | grep -q "$JAVA_VERSION"; then sdk install java "$JAVA_VERSION" fi + sdk use java "$JAVA_VERSION" echo "JAVA_HOME=$HOME/.sdkman/candidates/java/current" >> $GITHUB_ENV @@ -283,6 +285,7 @@ jobs: github.repository == 'beehive-lab/GPULlama3.java' && github.event_name == 'push' && github.ref == 'refs/heads/main' + runs-on: [self-hosted] needs: build-and-run timeout-minutes: 15 @@ -302,24 +305,36 @@ jobs: run: | python3 scripts/process_metrics.py \ --metrics-dir "${{ runner.temp }}/metrics-artifacts" \ - --commit "${{ github.sha }}" \ - --branch "${{ github.ref_name }}" \ - --run-id "${{ github.run_id }}" \ - --run-number "${{ github.run_number }}" \ - --run-attempt "${{ github.run_attempt }}" \ - --workflow "${{ github.workflow }}" \ - --history "$PERF_HISTORY_FILE" + --commit "${{ github.sha }}" \ + --branch "${{ github.ref_name }}" \ + --run-id "${{ github.run_id }}" \ + --run-number "${{ github.run_number }}" \ + --run-attempt "${{ github.run_attempt }}" \ + --workflow "${{ github.workflow }}" \ + --history "$PERF_HISTORY_FILE" - name: Commit performance history run: | - git config user.name "github-actions[bot]" + SHORT_SHA=$(echo "${GITHUB_SHA}" | cut -c1-8) + + git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" + git add "$PERF_HISTORY_FILE" - git diff --cached --quiet && echo "No history changes to commit" && exit 0 - git commit -m "perf: record run #${{ github.run_number }} @ ${GITHUB_SHA::8}" + + git diff --cached --quiet && \ + echo "No history changes to commit" && exit 0 + + git commit -m "perf: record run #${{ github.run_number }} @ ${SHORT_SHA}" + for attempt in 1 2 3; do git pull --rebase origin main && git push && break || { - [ $attempt -lt 3 ] && { echo "Attempt $attempt failed, retrying in $((attempt * 5))s..."; sleep $((attempt * 5)); } \ - || { echo "::error::Failed to push after 3 attempts"; exit 1; } + if [ $attempt -lt 3 ]; then + echo "Attempt $attempt failed, retrying in $((attempt * 5))s..." + sleep $((attempt * 5)) + else + echo "::error::Failed to push after 3 attempts" + exit 1 + fi } - done + done \ No newline at end of file diff --git a/.github/workflows/integration-quarkus-langchain4j.yml b/.github/workflows/integration-quarkus-langchain4j.yml index 25cc2a09..c27f7786 100644 --- a/.github/workflows/integration-quarkus-langchain4j.yml +++ b/.github/workflows/integration-quarkus-langchain4j.yml @@ -4,11 +4,11 @@ on: push: branches: [ main ] pull_request: - branches: [ main ] - types: [opened, synchronize, reopened] + branches: [ main ] + types: [opened, synchronize, reopened] schedule: - # Run every Saturday at 02:30 UTC to catch dependency breakages - - cron: '30 2 * * 6' + # Run every Saturday at 02:30 UTC to catch dependency breakages + - cron: '30 2 * * 6' workflow_dispatch: inputs: quarkus_langchain4j_version: @@ -77,7 +77,7 @@ jobs: # Build ./mvnw clean install -DskipTests - + # Save GPULlama3.java version for subsequent steps echo "GPULLAMA3_VERSION=$GPULLAMA3_VERSION" >> $GITHUB_ENV @@ -85,6 +85,7 @@ jobs: - name: Clone Quarkus LangChain4j run: | cd ${{ github.workspace }} + rm -rf quarkus-langchain4j git clone --depth 1 https://github.com/quarkiverse/quarkus-langchain4j.git # Step 4: Build Quarkus LangChain4j with current GPULlama3.java @@ -156,15 +157,15 @@ jobs: MAX_ATTEMPTS=3 ATTEMPT=1 SUCCESS=false - + while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do echo "Attempt $ATTEMPT of $MAX_ATTEMPTS for blocking endpoint..." - + # Trigger endpoint HTTP_RESPONSE=$(curl -s -w "%{http_code}" http://localhost:$QUARKUS_PORT/chat/blocking) HTTP_RESPONSE_CODE="${HTTP_RESPONSE: -3}" HTTP_RESPONSE_BODY="${HTTP_RESPONSE%???}" - + # Check response code if [ "$HTTP_RESPONSE_CODE" = "200" ]; then echo "SUCCESS: Blocking endpoint returned HTTP code: $HTTP_RESPONSE_CODE" @@ -174,16 +175,16 @@ jobs: else echo "Attempt $ATTEMPT failed: Blocking endpoint returned HTTP code $HTTP_RESPONSE_CODE" echo "Response body: $HTTP_RESPONSE_BODY" - + if [ $ATTEMPT -lt $MAX_ATTEMPTS ]; then echo "Retrying in 2 seconds..." sleep 2 fi fi - + ATTEMPT=$((ATTEMPT + 1)) done - + if [ "$SUCCESS" = false ]; then echo "ERROR: Blocking endpoint failed after $MAX_ATTEMPTS attempts" exit 1 @@ -233,4 +234,4 @@ jobs: run: | # Clean shutdown kill $APP_PID || true - wait $APP_PID 2>/dev/null || true + wait $APP_PID 2>/dev/null || true \ No newline at end of file From 5a48ad4a8ef482e9292e341130000ad03b01822e Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Sat, 23 May 2026 16:02:29 +0300 Subject: [PATCH 12/14] [ci][fix] Revamp `setup-tornadovm` action to replace cache-based logic with SHA-based build reuse for optimized workflow efficiency --- .github/actions/setup-tornadovm/action.yml | 48 ++++++++-------------- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/.github/actions/setup-tornadovm/action.yml b/.github/actions/setup-tornadovm/action.yml index d3742050..1ad1bdd2 100644 --- a/.github/actions/setup-tornadovm/action.yml +++ b/.github/actions/setup-tornadovm/action.yml @@ -1,5 +1,5 @@ name: Setup TornadoVM -description: Clone (or restore from cache), build, and configure TornadoVM. Exports TORNADOVM_HOME and updates PATH for all subsequent steps. +description: Build TornadoVM once per backend and reuse across runs via a local SHA sentinel. Exports TORNADOVM_HOME and updates PATH for all subsequent steps. inputs: backend: @@ -16,30 +16,28 @@ runs: SHA=$(git ls-remote https://github.com/beehive-lab/TornadoVM HEAD | cut -f1) echo "sha=$SHA" >> $GITHUB_OUTPUT - # actions/cache `path:` is an expression — it cannot access the calling - # workflow's env vars via ${{ env.* }}, so we resolve TORNADO_ROOT here. - - name: Resolve TORNADO_ROOT for cache path - id: paths + - name: Check local build sentinel + id: sentinel shell: bash - run: echo "tornado_root=$TORNADO_ROOT" >> $GITHUB_OUTPUT - - - name: Restore TornadoVM cache - id: cache - uses: actions/cache/restore@v4 - with: - path: ${{ steps.paths.outputs.tornado_root }} - key: tornadovm-${{ inputs.backend }}-${{ steps.tornado_sha.outputs.sha }} + run: | + SENTINEL="$TORNADO_ROOT/.built-sha" + if [ -f "$SENTINEL" ] && [ "$(cat $SENTINEL)" = "${{ steps.tornado_sha.outputs.sha }}" ]; then + echo "up-to-date=true" >> $GITHUB_OUTPUT + else + echo "up-to-date=false" >> $GITHUB_OUTPUT + fi - name: Clone TornadoVM master - if: steps.cache.outputs.cache-hit != 'true' + if: steps.sentinel.outputs.up-to-date != 'true' shell: bash run: | + rm -rf $TORNADO_ROOT git clone --depth 1 --branch master \ https://github.com/beehive-lab/TornadoVM.git \ $TORNADO_ROOT - name: Set up Python venv for TornadoVM - if: steps.cache.outputs.cache-hit != 'true' + if: steps.sentinel.outputs.up-to-date != 'true' shell: bash run: | python3 -m venv $TORNADO_ROOT/venv @@ -47,7 +45,7 @@ runs: python --version - name: Build TornadoVM - if: steps.cache.outputs.cache-hit != 'true' + if: steps.sentinel.outputs.up-to-date != 'true' shell: bash run: | cd $TORNADO_ROOT @@ -64,8 +62,10 @@ runs: exit 1 fi - # Runs on both cache hit and miss — sets TORNADOVM_HOME and PATH for all - # subsequent steps in the calling workflow. + echo "${{ steps.tornado_sha.outputs.sha }}" > .built-sha + + # Runs on both fresh build and sentinel hit — sets TORNADOVM_HOME and PATH + # for all subsequent steps in the calling workflow. - name: Configure TornadoVM environment shell: bash run: | @@ -79,11 +79,6 @@ runs: FULL_SDK="${PWD}/${SDK_DIR}" echo "Detected TornadoVM SDK: $FULL_SDK" - # Export for current shell session - export TORNADOVM_HOME="$FULL_SDK" - export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH" - - # Save for subsequent steps echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV echo "$FULL_SDK/bin" >> $GITHUB_PATH echo "$JAVA_HOME/bin" >> $GITHUB_PATH @@ -91,10 +86,3 @@ runs: echo "=== Checking tornado CLI ===" which tornado || { echo "::error::tornado not in PATH"; exit 1; } tornado --devices - - - name: Save TornadoVM cache - if: steps.cache.outputs.cache-hit != 'true' - uses: actions/cache/save@v4 - with: - path: ${{ steps.paths.outputs.tornado_root }} - key: tornadovm-${{ inputs.backend }}-${{ steps.tornado_sha.outputs.sha }} From b986b1c28899fc90fa0b74aa42ee70c4cf2b970c Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Mon, 25 May 2026 13:32:22 +0300 Subject: [PATCH 13/14] [ci] Introduce reusable `setup-java` action to unify and simplify JDK setup across workflows --- .github/actions/setup-java/action.yml | 22 ++++++++++++++++ .github/actions/setup-tornadovm/action.yml | 25 +++++++------------ .github/workflows/build-and-run.yml | 22 ++++------------ .github/workflows/deploy-maven-central.yml | 13 +++++----- .../integration-quarkus-langchain4j.yml | 18 +++---------- .github/workflows/prepare-release.yml | 10 ++++---- 6 files changed, 51 insertions(+), 59 deletions(-) create mode 100644 .github/actions/setup-java/action.yml diff --git a/.github/actions/setup-java/action.yml b/.github/actions/setup-java/action.yml new file mode 100644 index 00000000..334b5432 --- /dev/null +++ b/.github/actions/setup-java/action.yml @@ -0,0 +1,22 @@ +name: Setup Java +description: Install and activate a JDK via SDKMAN. Exports JAVA_HOME and updates PATH. + +inputs: + java_version: + description: 'SDKMAN Java version identifier (e.g. 21.0.2-open)' + required: true + +runs: + using: composite + steps: + - name: Set up Java with SDKMAN + shell: bash + run: | + source "$HOME/.sdkman/bin/sdkman-init.sh" + if ! sdk list java | grep -q "${{ inputs.java_version }}"; then + sdk install java "${{ inputs.java_version }}" + fi + sdk use java "${{ inputs.java_version }}" + echo "JAVA_HOME=$HOME/.sdkman/candidates/java/current" >> $GITHUB_ENV + echo "$HOME/.sdkman/candidates/java/current/bin" >> $GITHUB_PATH + java -version diff --git a/.github/actions/setup-tornadovm/action.yml b/.github/actions/setup-tornadovm/action.yml index 1ad1bdd2..3b1c5070 100644 --- a/.github/actions/setup-tornadovm/action.yml +++ b/.github/actions/setup-tornadovm/action.yml @@ -53,36 +53,29 @@ runs: source venv/bin/activate echo "=== Building TornadoVM ===" make BACKEND=${{ inputs.backend }} + echo "${{ steps.tornado_sha.outputs.sha }}" > .built-sha - echo "=== Verifying TornadoVM SDK directory ===" - SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ inputs.backend }}" | head -n 1) + - name: Find TornadoVM SDK directory + id: find_sdk + shell: bash + run: | + SDK_DIR=$(find $TORNADO_ROOT/dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ inputs.backend }}" | head -n 1) if [ -z "$SDK_DIR" ]; then echo "::error::Could not locate TornadoVM SDK directory!" - find dist -maxdepth 5 -type d + find $TORNADO_ROOT/dist -maxdepth 5 -type d exit 1 fi - - echo "${{ steps.tornado_sha.outputs.sha }}" > .built-sha + echo "sdk_dir=$SDK_DIR" >> $GITHUB_OUTPUT # Runs on both fresh build and sentinel hit — sets TORNADOVM_HOME and PATH # for all subsequent steps in the calling workflow. - name: Configure TornadoVM environment shell: bash run: | - cd $TORNADO_ROOT - SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ inputs.backend }}" | head -n 1) - if [ -z "$SDK_DIR" ]; then - echo "::error::Could not locate TornadoVM SDK directory!" - find dist -maxdepth 5 -type d - exit 1 - fi - FULL_SDK="${PWD}/${SDK_DIR}" + FULL_SDK="${{ steps.find_sdk.outputs.sdk_dir }}" echo "Detected TornadoVM SDK: $FULL_SDK" - echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV echo "$FULL_SDK/bin" >> $GITHUB_PATH echo "$JAVA_HOME/bin" >> $GITHUB_PATH - - echo "=== Checking tornado CLI ===" which tornado || { echo "::error::tornado not in PATH"; exit 1; } tornado --devices diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml index af298a2a..ef8a3a51 100644 --- a/.github/workflows/build-and-run.yml +++ b/.github/workflows/build-and-run.yml @@ -45,26 +45,14 @@ jobs: - name: ptx steps: - - name: Set up Java with SDKMAN - shell: bash - run: | - source "$HOME/.sdkman/bin/sdkman-init.sh" - - # install Java with sdkman, if not already installed - if ! sdk list java | grep -q "$JAVA_VERSION"; then - sdk install java "$JAVA_VERSION" - fi - - sdk use java "$JAVA_VERSION" - - echo "JAVA_HOME=$HOME/.sdkman/candidates/java/current" >> $GITHUB_ENV - echo "$HOME/.sdkman/candidates/java/current/bin" >> $GITHUB_PATH - - java -version - - name: Checkout GPULlama3 uses: actions/checkout@v4 + - name: Set up Java + uses: ./.github/actions/setup-java + with: + java_version: ${{ env.JAVA_VERSION }} + - name: Setup TornadoVM uses: ./.github/actions/setup-tornadovm env: diff --git a/.github/workflows/deploy-maven-central.yml b/.github/workflows/deploy-maven-central.yml index 88bcee8e..53f62226 100644 --- a/.github/workflows/deploy-maven-central.yml +++ b/.github/workflows/deploy-maven-central.yml @@ -31,19 +31,18 @@ jobs: matrix: jdk: - name: jdk21 - java_home: /opt/jenkins/jdks/graal-23.1.0/jdk-21.0.3 + java_version: 21.0.2-open - name: jdk25 - java_home: /opt/jenkins/jdks/jdk-25.0.2 - env: - JAVA_HOME: ${{ matrix.jdk.java_home }} + java_version: 25.0.2-open steps: - name: Checkout code uses: actions/checkout@v4 - - name: Setup environment - run: | - echo "$JAVA_HOME/bin" >> $GITHUB_PATH + - name: Set up Java + uses: ./.github/actions/setup-java + with: + java_version: ${{ matrix.jdk.java_version }} - name: Configure Maven settings run: | diff --git a/.github/workflows/integration-quarkus-langchain4j.yml b/.github/workflows/integration-quarkus-langchain4j.yml index c27f7786..8c44484d 100644 --- a/.github/workflows/integration-quarkus-langchain4j.yml +++ b/.github/workflows/integration-quarkus-langchain4j.yml @@ -39,20 +39,10 @@ jobs: - name: ptx steps: - - name: Set up Java with SDKMAN - shell: bash - run: | - source "$HOME/.sdkman/bin/sdkman-init.sh" - - if ! sdk list java | grep -q "installed.*$JAVA_VERSION"; then - sdk install java "$JAVA_VERSION" - fi - sdk use java "$JAVA_VERSION" - - echo "JAVA_HOME=$HOME/.sdkman/candidates/java/current" >> $GITHUB_ENV - echo "$HOME/.sdkman/candidates/java/current/bin" >> $GITHUB_PATH - - java -version + - name: Set up Java + uses: ./.github/actions/setup-java + with: + java_version: ${{ env.JAVA_VERSION }} # Step 1: Clone and build TornadoVM - name: Setup TornadoVM diff --git a/.github/workflows/prepare-release.yml b/.github/workflows/prepare-release.yml index 3ff71f90..00d6c63b 100644 --- a/.github/workflows/prepare-release.yml +++ b/.github/workflows/prepare-release.yml @@ -20,6 +20,7 @@ on: env: VERSION: ${{ inputs.version }} PREV_VERSION: ${{ inputs.previous_version }} + JAVA_VERSION: 21.0.2-open jobs: prepare-release: @@ -29,8 +30,6 @@ jobs: contents: write pull-requests: write timeout-minutes: 15 - env: - JAVA_HOME: /opt/jenkins/jdks/graal-23.1.0/jdk-21.0.3 steps: - name: Validate version format @@ -48,9 +47,10 @@ jobs: fetch-depth: 0 token: ${{ secrets.GITHUB_TOKEN }} - - name: Setup environment - run: | - echo "$JAVA_HOME/bin" >> $GITHUB_PATH + - name: Set up Java + uses: ./.github/actions/setup-java + with: + java_version: ${{ env.JAVA_VERSION }} - name: Configure Git run: | From 7d13c88435402a9bfbad66da8cb17819137d37ce Mon Sep 17 00:00:00 2001 From: Orion Papadakis Date: Mon, 25 May 2026 16:33:15 +0300 Subject: [PATCH 14/14] [ci] Merge integration tests into main ci workflow --- .github/workflows/build-and-run.yml | 180 ++++++++++++-- .../integration-quarkus-langchain4j.yml | 227 ------------------ 2 files changed, 163 insertions(+), 244 deletions(-) delete mode 100644 .github/workflows/integration-quarkus-langchain4j.yml diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml index ef8a3a51..2a19099f 100644 --- a/.github/workflows/build-and-run.yml +++ b/.github/workflows/build-and-run.yml @@ -9,10 +9,9 @@ on: env: JAVA_VERSION: 21.0.2-open - TORNADO_BASE_ROOT: ${{ github.workspace }}/GPULlama3.java/external - LLAMA_ROOT: ${{ github.workspace }} GRAAL_JARS: /opt/graalJars MODELS_DIR: /opt/models + QUARKUS_PORT: 8081 # History file committed back to the repo on push to main PERF_HISTORY_FILE: docs/perf-history.jsonl @@ -31,14 +30,17 @@ jobs: cd ${{ github.workspace }} # ./mvnw -T12C -Pspotless spotless:check - build-and-run: + # Build: TornadoVM → GPULlama3 → Quarkus LangChain4j + # max-parallel: 1 ensures the opencl and ptx variants run sequentially so + # there are no workspace conflicts between matrix jobs. + build: if: github.repository == 'beehive-lab/GPULlama3.java' runs-on: [self-hosted] needs: code-quality timeout-minutes: 30 - strategy: fail-fast: true + max-parallel: 1 matrix: backend: - name: opencl @@ -47,6 +49,8 @@ jobs: steps: - name: Checkout GPULlama3 uses: actions/checkout@v4 + with: + clean: false - name: Set up Java uses: ./.github/actions/setup-java @@ -56,18 +60,66 @@ jobs: - name: Setup TornadoVM uses: ./.github/actions/setup-tornadovm env: - TORNADO_ROOT: ${{ env.TORNADO_BASE_ROOT }}/tornadovm-${{ matrix.backend.name }} + TORNADO_ROOT: ${{ runner.tool_cache }}/tornadovm/tornadovm-${{ matrix.backend.name }} with: backend: ${{ matrix.backend.name }} - name: Build GPULlama3.java run: | - cd ${{ github.workspace }} - echo "Using TORNADOVM_HOME=$TORNADOVM_HOME" tornado --version - ./mvnw clean package -DskipTests + # Strip any pre-existing -SNAPSHOT suffix before appending, making this step idempotent + # across sequential matrix variants (ptx runs after opencl on the same workspace). + BASE_VERSION=$(./mvnw help:evaluate -Dexpression=project.version -q -DforceStdout | sed 's/-SNAPSHOT$//') + GPULLAMA3_VERSION="${BASE_VERSION}-SNAPSHOT" + echo "GPULlama3 version: $GPULLAMA3_VERSION" + ./mvnw versions:set -DnewVersion=$GPULLAMA3_VERSION + ./mvnw clean install -DskipTests + echo "GPULLAMA3_VERSION=$GPULLAMA3_VERSION" >> $GITHUB_ENV + + - name: Clone Quarkus LangChain4j + run: | + rm -rf quarkus-langchain4j + git clone --depth 1 https://github.com/quarkiverse/quarkus-langchain4j.git + + - name: Build Quarkus LangChain4j + run: | + cd ${{ github.workspace }}/quarkus-langchain4j + sed -i 's/.*<\/gpu-llama3\.version>/'$GPULLAMA3_VERSION'<\/gpu-llama3.version>/' pom.xml + # -Dtornado activates the TornadoVM profile; -am builds only the gpu-llama3 module + deps + mvn clean install -pl integration-tests/gpu-llama3 -am -DskipTests -Dtornado + + standalone-inference: + if: github.repository == 'beehive-lab/GPULlama3.java' + runs-on: [self-hosted] + needs: build + timeout-minutes: 30 + strategy: + fail-fast: true + matrix: + backend: + - name: opencl + - name: ptx - # ── Llama-3.2-1B: standard + prefill-decode variants, all backends ────────── + steps: + - name: Checkout GPULlama3 + uses: actions/checkout@v4 + with: + clean: false + + - name: Set up Java + uses: ./.github/actions/setup-java + with: + java_version: ${{ env.JAVA_VERSION }} + + - name: Setup TornadoVM + uses: ./.github/actions/setup-tornadovm + env: + TORNADO_ROOT: ${{ runner.tool_cache }}/tornadovm/tornadovm-${{ matrix.backend.name }} + with: + backend: ${{ matrix.backend.name }} + + # Test standalone mode per model family and quantization + # Note: variants can be represented with matrices - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Standard uses: ./.github/actions/run-inference with: @@ -100,12 +152,12 @@ jobs: flags: --with-prefill-decode --batch-prefill-size 32 metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.json - # ── PTX-only: CUDA-graph variants ──────────────────────────────────────── + # PTX-only: CUDA-graph variants - name: PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs if: matrix.backend.name == 'ptx' uses: ./.github/actions/run-inference with: - backend: ptx + backend: ${{ matrix.backend.name }} model_file: Llama-3.2-1B-Instruct-F16.gguf model: Llama-3.2-1B-Instruct quantization: F16 @@ -117,7 +169,7 @@ jobs: if: matrix.backend.name == 'ptx' uses: ./.github/actions/run-inference with: - backend: ptx + backend: ${{ matrix.backend.name }} model_file: Llama-3.2-1B-Instruct-F16.gguf model: Llama-3.2-1B-Instruct quantization: F16 @@ -125,7 +177,6 @@ jobs: flags: --with-prefill-decode --batch-prefill-size 32 --cuda-graphs metrics_file: ${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.json - # ── Additional models — standard inference, all backends ───────────────── - name: FP16 - Run Qwen3-4B-f16.gguf uses: ./.github/actions/run-inference with: @@ -256,7 +307,7 @@ jobs: configuration: standard metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.json - # ── Upload metrics for the publish job ──────────────────────────────────── + # Upload metrics for the publish job - name: Upload metrics artifacts if: always() uses: actions/upload-artifact@v4 @@ -265,7 +316,102 @@ jobs: path: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-*.json if-no-files-found: warn - # ── Separate job: collect all matrix metrics and update history ─────────────── + # Test integration with Quarkus-langchain4j + quarkus-langchain4j-integration: + if: github.repository == 'beehive-lab/GPULlama3.java' + runs-on: [self-hosted] + needs: build + timeout-minutes: 10 + strategy: + fail-fast: true + matrix: + backend: + - name: opencl + - name: ptx + + steps: + - name: Checkout GPULlama3 + uses: actions/checkout@v4 + with: + clean: false + + - name: Set up Java + uses: ./.github/actions/setup-java + with: + java_version: ${{ env.JAVA_VERSION }} + + - name: Setup TornadoVM + uses: ./.github/actions/setup-tornadovm + env: + TORNADO_ROOT: ${{ runner.tool_cache }}/tornadovm/tornadovm-${{ matrix.backend.name }} + with: + backend: ${{ matrix.backend.name }} + + - name: Verify GPULlama3 Dependency + run: | + cd ${{ github.workspace }}/quarkus-langchain4j/integration-tests/gpu-llama3 + mvn dependency:tree | grep "io.github.beehive-lab:gpu-llama3" + + - name: Start Quarkus Application + run: | + cd ${{ github.workspace }}/quarkus-langchain4j/integration-tests/gpu-llama3 + java @"$TORNADOVM_HOME/tornado-argfile" \ + -Dtornado.device.memory=8GB \ + -Dquarkus.http.port=$QUARKUS_PORT \ + -jar target/quarkus-app/quarkus-run.jar & + APP_PID=$! + echo "APP_PID=$APP_PID" >> $GITHUB_ENV + for i in {1..30}; do + if curl -s http://localhost:$QUARKUS_PORT/q/health > /dev/null 2>&1; then + echo "Application ready after ${i} seconds" + break + elif [ $i -eq 30 ]; then + echo "::error::Application failed to start within 30 seconds" + kill $APP_PID || true + exit 1 + else + [ $((i % 5)) -eq 0 ] && echo "Still waiting... (${i}s)" + sleep 1 + fi + done + + - name: Trigger Blocking Endpoint + run: | + for attempt in 1 2 3; do + echo "Attempt $attempt of 3 for blocking endpoint..." + HTTP_RESPONSE=$(curl -s -w "%{http_code}" http://localhost:$QUARKUS_PORT/chat/blocking) + HTTP_CODE="${HTTP_RESPONSE: -3}" + if [ "$HTTP_CODE" = "200" ]; then + echo "SUCCESS: HTTP $HTTP_CODE" + echo "Response body: ${HTTP_RESPONSE%???}" + break + fi + echo "Failed: HTTP $HTTP_CODE" + [ $attempt -lt 3 ] && sleep 2 + [ $attempt -eq 3 ] && { echo "::error::Blocking endpoint failed after 3 attempts"; exit 1; } + done + + - name: Trigger Streaming Endpoint + run: | + for attempt in 1 2 3; do + echo "Attempt $attempt of 3 for streaming endpoint..." + HTTP_CODE=$(timeout 10s curl -s -o /dev/null -w "%{http_code}" http://localhost:$QUARKUS_PORT/chat/streaming) + if [ "$HTTP_CODE" = "200" ]; then + echo "SUCCESS: HTTP $HTTP_CODE" + break + fi + echo "Failed: HTTP $HTTP_CODE" + [ $attempt -lt 3 ] && sleep 2 + [ $attempt -eq 3 ] && { echo "::error::Streaming endpoint failed after 3 attempts"; exit 1; } + done + + - name: Cleanup & Shutdown + if: always() + run: | + kill $APP_PID || true + wait $APP_PID 2>/dev/null || true + + # Collect all matrix metrics and update history publish-performance-history: # Guard: only commit history on real pushes to main, not on PRs or forks. # Prevents duplicate entries from PR runs and avoids push-permission errors on forks. @@ -275,7 +421,7 @@ jobs: github.ref == 'refs/heads/main' runs-on: [self-hosted] - needs: build-and-run + needs: standalone-inference timeout-minutes: 15 steps: @@ -325,4 +471,4 @@ jobs: exit 1 fi } - done \ No newline at end of file + done diff --git a/.github/workflows/integration-quarkus-langchain4j.yml b/.github/workflows/integration-quarkus-langchain4j.yml deleted file mode 100644 index 8c44484d..00000000 --- a/.github/workflows/integration-quarkus-langchain4j.yml +++ /dev/null @@ -1,227 +0,0 @@ -name: Integration Quarkus-LangChain4j - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - types: [opened, synchronize, reopened] - schedule: - # Run every Saturday at 02:30 UTC to catch dependency breakages - - cron: '30 2 * * 6' - workflow_dispatch: - inputs: - quarkus_langchain4j_version: - description: 'Quarkus LangChain4j version to test against' - required: false - type: string - quarkus_version: - description: 'Quarkus platform version to test against' - required: false - type: string - -env: - JAVA_VERSION: 21.0.2-open - TORNADO_BASE_ROOT: ${{ github.workspace }}/GPULlama3.java/external - GRAAL_JARS: /opt/graalJars - QUARKUS_PORT: 8081 - -jobs: - quarkus-integration-test: - if: github.repository == 'beehive-lab/GPULlama3.java' - runs-on: [self-hosted] - timeout-minutes: 30 - strategy: - fail-fast: true - matrix: - backend: - - name: opencl - - name: ptx - - steps: - - name: Set up Java - uses: ./.github/actions/setup-java - with: - java_version: ${{ env.JAVA_VERSION }} - - # Step 1: Clone and build TornadoVM - - name: Setup TornadoVM - uses: ./.github/actions/setup-tornadovm - env: - TORNADO_ROOT: ${{ env.TORNADO_BASE_ROOT }}/tornadovm-${{ matrix.backend.name }} - with: - backend: ${{ matrix.backend.name }} - - # Step 2: Build GPULlama3.java - - name: Build GPULlama3.java - run: | - cd ${{ github.workspace }} - echo "Using TORNADOVM_HOME=$TORNADOVM_HOME" - tornado --version - - # Append SNAPSHOT to GPULlama3 version - GPULLAMA3_VERSION=$(./mvnw help:evaluate -Dexpression=project.version -q -DforceStdout) - GPULLAMA3_VERSION="${GPULLAMA3_VERSION}-SNAPSHOT" - echo "GPULlama3 version: $GPULLAMA3_VERSION" - ./mvnw versions:set -DnewVersion=$GPULLAMA3_VERSION - - # Build - ./mvnw clean install -DskipTests - - # Save GPULlama3.java version for subsequent steps - echo "GPULLAMA3_VERSION=$GPULLAMA3_VERSION" >> $GITHUB_ENV - - # Step 3: Clone Quarkus LangChain4j - - name: Clone Quarkus LangChain4j - run: | - cd ${{ github.workspace }} - rm -rf quarkus-langchain4j - git clone --depth 1 https://github.com/quarkiverse/quarkus-langchain4j.git - - # Step 4: Build Quarkus LangChain4j with current GPULlama3.java - - name: Build Quarkus LangChain4j - run: | - cd ${{ github.workspace }}/quarkus-langchain4j - - # Update the GPULlama3 version used by quarkus-langchain4j - POM="pom.xml" - sed -i 's/.*<\/gpu-llama3\.version>/'$GPULLAMA3_VERSION'<\/gpu-llama3.version>/' "$POM" - - # Use reactor to build *only *GPULlama3 integration test + dependencies - # This recompiles everything with the same Java version, avoiding compatibility issues - # The -Dtornado flag activates the TornadoVM profile which includes gpu-llama3 module - mvn clean install -pl integration-tests/gpu-llama3 -am -DskipTests -Dtornado - - # Step 4.5: Verify GPULlama3 JAR - - name: Verify GPULlama3 Dependency - run: | - cd ${{ github.workspace }}/quarkus-langchain4j/integration-tests/gpu-llama3 - echo "GPULlama3 dependency used by Quarkus-LangChain4j:" - mvn dependency:tree | grep "io.github.beehive-lab:gpu-llama3" - - # Step 5: Start Quarkus Application and Wait for Startup - - name: Start Quarkus Application and Wait for Startup - run: | - cd ${{ github.workspace }}/quarkus-langchain4j/integration-tests/gpu-llama3 - - echo "Starting Quarkus application on port $QUARKUS_PORT..." - - # Start the Quarkus application in the background - java @"$TORNADOVM_HOME/tornado-argfile" \ - -Dtornado.device.memory=8GB \ - -Dquarkus.http.port=$QUARKUS_PORT \ - -jar target/quarkus-app/quarkus-run.jar & - APP_PID=$! - echo "APP_PID=$APP_PID" >> $GITHUB_ENV - - if [ -z "$APP_PID" ]; then - echo "ERROR: Failed to start Quarkus application" - exit 1 - fi - - echo "Waiting for Quarkus application to start..." - - # Wait for application to be ready - for i in {1..30}; do - if curl -s http://localhost:$QUARKUS_PORT/q/health > /dev/null 2>&1; then - echo "Application ready after ${i} seconds" - echo "Health endpoint: http://localhost:$QUARKUS_PORT/q/health" - break - elif [ $i -eq 30 ]; then - echo "ERROR: Application failed to start within 30 seconds" - echo "Debugging info:" - echo "- Port: $QUARKUS_PORT" - echo "- Process ID: $APP_PID" - echo "- Health URL: http://localhost:$QUARKUS_PORT/q/health" - kill $APP_PID || true - exit 1 - else - [ $((i % 5)) -eq 0 ] && echo "Still waiting... (${i}s)" - sleep 1 - fi - done - - # Step 6: Run test 1 - - name: Trigger Blocking Endpoint - run: | - MAX_ATTEMPTS=3 - ATTEMPT=1 - SUCCESS=false - - while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do - echo "Attempt $ATTEMPT of $MAX_ATTEMPTS for blocking endpoint..." - - # Trigger endpoint - HTTP_RESPONSE=$(curl -s -w "%{http_code}" http://localhost:$QUARKUS_PORT/chat/blocking) - HTTP_RESPONSE_CODE="${HTTP_RESPONSE: -3}" - HTTP_RESPONSE_BODY="${HTTP_RESPONSE%???}" - - # Check response code - if [ "$HTTP_RESPONSE_CODE" = "200" ]; then - echo "SUCCESS: Blocking endpoint returned HTTP code: $HTTP_RESPONSE_CODE" - echo "HTTP Response body: $HTTP_RESPONSE_BODY" - SUCCESS=true - break - else - echo "Attempt $ATTEMPT failed: Blocking endpoint returned HTTP code $HTTP_RESPONSE_CODE" - echo "Response body: $HTTP_RESPONSE_BODY" - - if [ $ATTEMPT -lt $MAX_ATTEMPTS ]; then - echo "Retrying in 2 seconds..." - sleep 2 - fi - fi - - ATTEMPT=$((ATTEMPT + 1)) - done - - if [ "$SUCCESS" = false ]; then - echo "ERROR: Blocking endpoint failed after $MAX_ATTEMPTS attempts" - exit 1 - fi - - # Step 7: Run test 2 - - name: Trigger Streaming Endpoint - run: | - MAX_ATTEMPTS=3 - ATTEMPT=1 - SUCCESS=false - - while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do - echo "Attempt $ATTEMPT of $MAX_ATTEMPTS for streaming endpoint..." - - # Trigger endpoint - HTTP_RESPONSE=$(timeout 10s curl -s -w "%{http_code}" http://localhost:$QUARKUS_PORT/chat/streaming) - HTTP_RESPONSE_CODE="${HTTP_RESPONSE: -3}" - #HTTP_RESPONSE_BODY="$HTTP_RESPONSE%???}" - - # Check response code - if [ "$HTTP_RESPONSE_CODE" == "200" ]; then - echo "SUCCESS: Streaming endpoint returned HTTP code: ${HTTP_RESPONSE: -3}" - # do not show ugly streaming response body - #echo "HTTP Response body: $HTTP_RESPONSE_BODY" - SUCCESS=true - break - else - echo "Attempt $ATTEMPT failed: Streaming endpoint returned HTTP code $HTTP_RESPONSE_CODE" - - if [ $ATTEMPT -lt $MAX_ATTEMPTS ]; then - echo "Retrying in 2 seconds..." - sleep 2 - fi - fi - - ATTEMPT=$((ATTEMPT + 1)) - done - - if [ "$SUCCESS" = false ]; then - echo "ERROR: Streaming endpoint failed after $MAX_ATTEMPTS attempts" - exit 1 - fi - - # Step 8: Cleanup & Shutdown - - name: Cleanup & Shutdown - run: | - # Clean shutdown - kill $APP_PID || true - wait $APP_PID 2>/dev/null || true \ No newline at end of file