From 8568aa7851aa7a677f220d2bf00cd1c65e635365 Mon Sep 17 00:00:00 2001
From: Jorge Calvar <jorge.calvar@databricks.com>
Date: Tue, 9 Jun 2026 11:26:40 +0200
Subject: [PATCH 1/4] ci: trigger dogfood eval pipeline on run-evals PR label

Add a GitHub Actions workflow that launches the dogfood eval pipeline
(job 398185277057549) when the `run-evals` label is added to a PR, and
re-launches it on each new commit while the label stays on. The real PR
head commit is passed as `appkit_ref` so the pipeline can pull the code;
`prompt_preset=custom-pr` and `tags=appkit_pr:<number>` are also set.

Authenticates as the apps-mcp-evals-runner service principal via OAuth
M2M, and posts a sticky "Eval running" comment linking the evals-monitor
PR page and the triggered job run. Comment logic lives in
.github/scripts/upsert-eval-comment.cjs.

Co-authored-by: Isaac
Signed-off-by: Jorge Calvar <jorge.calvar@databricks.com>
---
 .github/scripts/upsert-eval-comment.cjs | 72 ++++++++++++++++++++
 .github/workflows/eval-trigger.yml      | 88 +++++++++++++++++++++++++
 2 files changed, 160 insertions(+)
 create mode 100644 .github/scripts/upsert-eval-comment.cjs
 create mode 100644 .github/workflows/eval-trigger.yml
diff --git a/.github/scripts/upsert-eval-comment.cjs b/.github/scripts/upsert-eval-comment.cjs
new file mode 100644
index 000000000..8bbf970c8
--- /dev/null
+++ b/.github/scripts/upsert-eval-comment.cjs
@@ -0,0 +1,72 @@
+/**
+ * Upserts a sticky "Eval running" comment on a PR after the dogfood eval
+ * pipeline has been launched.
+ *
+ * Invoked via `actions/github-script`. Inputs come from environment vars:
+ *   PR_NUMBER - the pull request number
+ *   HEAD_SHA  - the commit the eval was launched for
+ *   RUN_JSON  - raw JSON from `databricks jobs run-now` (used to link the run)
+ */
+
+const MARKER = "<!-- pr-eval-run -->";
+const EVALS_MONITOR_URL =
+  "https://evals-monitor-6051921418418893.staging.aws.databricksapps.com";
+const DATABRICKS_HOST = "https://dogfood.staging.databricks.com";
+const JOB_ID = "398185277057549";
+const WORKSPACE_ID = "6051921418418893";
+
+module.exports = async ({ github, context }) => {
+  const { owner, repo } = context.repo;
+  const issue_number = Number(process.env.PR_NUMBER);
+  const shortSha = (process.env.HEAD_SHA || "").substring(0, 7);
+
+  // run_id comes back in the run-now response, so the run link costs no extra call.
+  let runId;
+  try {
+    runId = JSON.parse(process.env.RUN_JSON || "{}").run_id;
+  } catch {
+    runId = undefined;
+  }
+
+  const links = [
+    `[View results in evals-monitor →](${EVALS_MONITOR_URL}/prs/appkit/${issue_number})`,
+  ];
+  if (runId) {
+    links.push(
+      `<sub>[job run ↗](${DATABRICKS_HOST}/jobs/${JOB_ID}/runs/${runId}?o=${WORKSPACE_ID})</sub>`,
+    );
+  }
+
+  const body = [
+    MARKER,
+    "### ⏳ Eval running",
+    "",
+    `Eval pipeline launched for commit \`${shortSha}\`.`,
+    "",
+    links.join(" · "),
+  ].join("\n");
+
+  const comments = await github.paginate(github.rest.issues.listComments, {
+    owner,
+    repo,
+    issue_number,
+    per_page: 100,
+  });
+  const existing = comments.find((c) => c.body?.includes(MARKER));
+
+  if (existing) {
+    await github.rest.issues.updateComment({
+      owner,
+      repo,
+      comment_id: existing.id,
+      body,
+    });
+  } else {
+    await github.rest.issues.createComment({
+      owner,
+      repo,
+      issue_number,
+      body,
+    });
+  }
+};
diff --git a/.github/workflows/eval-trigger.yml b/.github/workflows/eval-trigger.yml
new file mode 100644
index 000000000..08485d938
--- /dev/null
+++ b/.github/workflows/eval-trigger.yml
@@ -0,0 +1,88 @@
+name: Eval Trigger
+
+# Launches the dogfood eval pipeline for a PR when the `run-evals` label is
+# present, and re-launches it on every new commit while the label stays on.
+#
+# Uses `pull_request` (not `pull_request_target`): the workflow file that runs
+# is the PR branch's version (so it's testable on the feature branch), and repo
+# secrets are withheld from fork PRs, so an external contributor cannot exfil
+# the Databricks credentials even by editing this file. Auth is OAuth M2M as the
+# `apps-mcp-evals-runner` service principal (DATABRICKS_CLIENT_ID/SECRET), and
+# those credentials are exposed only to the trigger step, never to the comment
+# step that runs PR-authored script code.
+on:
+  pull_request:
+    types: [labeled, synchronize]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+# Latest push wins: a newer commit cancels the in-flight run for an older one,
+# so the sticky comment always reflects the most recently triggered commit.
+concurrency:
+  group: eval-trigger-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  trigger-evals:
+    name: Trigger eval pipeline
+    # Run when the `run-evals` label is added, or on a new commit while the PR
+    # already carries the label.
+    if: >-
+      (github.event.action == 'labeled' && github.event.label.name == 'run-evals') ||
+      (github.event.action == 'synchronize' && contains(github.event.pull_request.labels.*.name, 'run-evals'))
+    runs-on:
+      group: databricks-protected-runner-group
+      labels: linux-ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
+
+      - name: Install Databricks CLI
+        uses: databricks/setup-cli@772863b94473abd8b0cacbec8b6f80fa0cbe1136 # v1.2.1
+
+      - name: Trigger eval pipeline
+        id: trigger
+        env:
+          DATABRICKS_HOST: https://dogfood.staging.databricks.com
+          # OAuth M2M as the apps-mcp-evals-runner service principal. The CLI
+          # auto-selects client-credentials auth when these are present.
+          DATABRICKS_CLIENT_ID: ${{ secrets.EVALS_DATABRICKS_CLIENT_ID_DOGFOOD }}
+          DATABRICKS_CLIENT_SECRET: ${{ secrets.EVALS_DATABRICKS_CLIENT_SECRET_DOGFOOD }}
+          # The real PR head commit — never the synthetic merge commit — so the
+          # pipeline can pull the code.
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+        # --no-wait: fire-and-forget; run-now otherwise blocks until the eval
+        # finishes. The JSON response carries run_id, which we forward to the
+        # comment step to link the run (no extra API call).
+        run: |
+          run_json=$(databricks jobs run-now --no-wait --output json --json "$(cat <<EOF
+          {
+            "job_id": 398185277057549,
+            "job_parameters": {
+              "appkit_ref": "${HEAD_SHA}",
+              "prompt_preset": "custom-pr",
+              "tags": "appkit_pr:${PR_NUMBER}"
+            }
+          }
+          EOF
+          )")
+          echo "$run_json"
+          {
+            echo "run_json<<RUN_JSON_EOF"
+            echo "$run_json"
+            echo "RUN_JSON_EOF"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Post / update "Eval running" comment
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        env:
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          RUN_JSON: ${{ steps.trigger.outputs.run_json }}
+        with:
+          script: |
+            const upsert = require('./.github/scripts/upsert-eval-comment.cjs');
+            await upsert({ github, context });

From 12c3f8f460909531e2a622c8c1a73864761b1d30 Mon Sep 17 00:00:00 2001
From: Jorge Calvar <jorge.calvar@databricks.com>
Date: Tue, 9 Jun 2026 11:35:04 +0200
Subject: [PATCH 2/4] ci: add temporary auth/connectivity diagnostics to
 eval-trigger

Probe dogfood reachability + workspace OIDC discovery and run a forced
oauth-m2m authenticated call with debug logging, to pin down the "cannot
configure default credentials" failure. To be reverted once auth works.

Co-authored-by: Isaac
Signed-off-by: Jorge Calvar <jorge.calvar@databricks.com>
---
 .github/workflows/eval-trigger.yml | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/eval-trigger.yml b/.github/workflows/eval-trigger.yml
index 08485d938..273ecdc4e 100644
--- a/.github/workflows/eval-trigger.yml
+++ b/.github/workflows/eval-trigger.yml
@@ -42,14 +42,37 @@ jobs:
       - name: Install Databricks CLI
         uses: databricks/setup-cli@772863b94473abd8b0cacbec8b6f80fa0cbe1136 # v1.2.1
 
+      # TEMPORARY diagnostic — remove once auth works. Disambiguates "cannot
+      # configure default credentials": is dogfood reachable + does OIDC
+      # discovery resolve, and does forced M2M auth produce a clearer error?
+      - name: Diagnose Databricks connectivity + auth
+        env:
+          DATABRICKS_HOST: https://dogfood.staging.databricks.com
+          DATABRICKS_CLIENT_ID: ${{ secrets.EVALS_DATABRICKS_CLIENT_ID_DOGFOOD }}
+          DATABRICKS_CLIENT_SECRET: ${{ secrets.EVALS_DATABRICKS_CLIENT_SECRET_DOGFOOD }}
+          DATABRICKS_AUTH_TYPE: oauth-m2m
+        run: |
+          echo "::group::CLI version"
+          databricks --version
+          echo "::endgroup::"
+          echo "::group::Reachability"
+          curl -sS -m 20 -o /dev/null -w "host root:      HTTP %{http_code}\n" "$DATABRICKS_HOST/" || echo "host root: UNREACHABLE"
+          curl -sS -m 20 -w "\nworkspace OIDC: HTTP %{http_code}\n" "$DATABRICKS_HOST/oidc/.well-known/oauth-authorization-server" || echo "workspace OIDC: UNREACHABLE"
+          echo "::endgroup::"
+          echo "::group::Forced M2M auth (debug)"
+          # current-user me is a cheap authenticated call; --log-level debug
+          # surfaces the real auth-resolution error. Never fails the job.
+          databricks current-user me --log-level debug || true
+          echo "::endgroup::"
+
       - name: Trigger eval pipeline
         id: trigger
         env:
           DATABRICKS_HOST: https://dogfood.staging.databricks.com
-          # OAuth M2M as the apps-mcp-evals-runner service principal. The CLI
-          # auto-selects client-credentials auth when these are present.
+          # OAuth M2M as the apps-mcp-evals-runner service principal.
           DATABRICKS_CLIENT_ID: ${{ secrets.EVALS_DATABRICKS_CLIENT_ID_DOGFOOD }}
           DATABRICKS_CLIENT_SECRET: ${{ secrets.EVALS_DATABRICKS_CLIENT_SECRET_DOGFOOD }}
+          DATABRICKS_AUTH_TYPE: oauth-m2m
           # The real PR head commit — never the synthetic merge commit — so the
           # pipeline can pull the code.
           HEAD_SHA: ${{ github.event.pull_request.head.sha }}

From 2a240dd909d07ecc6deca571ce0b79094e761a0f Mon Sep 17 00:00:00 2001
From: Jorge Calvar <jorge.calvar@databricks.com>
Date: Tue, 9 Jun 2026 11:41:12 +0200
Subject: [PATCH 3/4] ci: add workflow_dispatch + configurable runner to
 eval-trigger

Allow manual runs to probe staging connectivity from an arbitrary runner
group (runner_group/runner_labels inputs), since dogfood.staging blocks
the default databricks-protected-runner-group at the network edge. A bare
dispatch runs only the diagnostic; pass pr_number to also trigger the job
and post the comment.

Co-authored-by: Isaac
Signed-off-by: Jorge Calvar <jorge.calvar@databricks.com>
---
 .github/workflows/eval-trigger.yml | 51 +++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/eval-trigger.yml b/.github/workflows/eval-trigger.yml
index 273ecdc4e..774a854ce 100644
--- a/.github/workflows/eval-trigger.yml
+++ b/.github/workflows/eval-trigger.yml
@@ -10,9 +10,32 @@ name: Eval Trigger
 # `apps-mcp-evals-runner` service principal (DATABRICKS_CLIENT_ID/SECRET), and
 # those credentials are exposed only to the trigger step, never to the comment
 # step that runs PR-authored script code.
+#
+# workflow_dispatch is provided for manual testing — notably to probe whether a
+# given runner group can reach dogfood.staging (which is network-restricted).
+# A bare manual run executes only the connectivity diagnostic; pass `pr_number`
+# to also trigger the job and post the comment.
 on:
   pull_request:
     types: [labeled, synchronize]
+  workflow_dispatch:
+    inputs:
+      runner_group:
+        description: "Runner group to run on (default: databricks-protected-runner-group)"
+        required: false
+        type: string
+      runner_labels:
+        description: "Runner labels (default: linux-ubuntu-latest)"
+        required: false
+        type: string
+      pr_number:
+        description: "PR number — if set, actually triggers the eval job and posts the comment"
+        required: false
+        type: string
+      appkit_ref:
+        description: "Commit SHA to eval (default: the ref this run is on)"
+        required: false
+        type: string
 
 permissions:
   contents: read
@@ -21,20 +44,21 @@ permissions:
 # Latest push wins: a newer commit cancels the in-flight run for an older one,
 # so the sticky comment always reflects the most recently triggered commit.
 concurrency:
-  group: eval-trigger-${{ github.event.pull_request.number }}
+  group: eval-trigger-${{ github.event.pull_request.number || inputs.pr_number || github.run_id }}
   cancel-in-progress: true
 
 jobs:
   trigger-evals:
     name: Trigger eval pipeline
-    # Run when the `run-evals` label is added, or on a new commit while the PR
-    # already carries the label.
+    # Run when manually dispatched, when the `run-evals` label is added, or on a
+    # new commit while the PR already carries the label.
     if: >-
+      github.event_name == 'workflow_dispatch' ||
       (github.event.action == 'labeled' && github.event.label.name == 'run-evals') ||
       (github.event.action == 'synchronize' && contains(github.event.pull_request.labels.*.name, 'run-evals'))
     runs-on:
-      group: databricks-protected-runner-group
-      labels: linux-ubuntu-latest
+      group: ${{ inputs.runner_group || 'databricks-protected-runner-group' }}
+      labels: ${{ inputs.runner_labels || 'linux-ubuntu-latest' }}
     steps:
       - name: Check out repository
         uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
@@ -42,9 +66,9 @@ jobs:
       - name: Install Databricks CLI
         uses: databricks/setup-cli@772863b94473abd8b0cacbec8b6f80fa0cbe1136 # v1.2.1
 
-      # TEMPORARY diagnostic — remove once auth works. Disambiguates "cannot
-      # configure default credentials": is dogfood reachable + does OIDC
-      # discovery resolve, and does forced M2M auth produce a clearer error?
+      # TEMPORARY diagnostic — remove once auth/connectivity works. Probes
+      # whether this runner can reach dogfood.staging and resolve OIDC, then
+      # tries a forced M2M authenticated call with debug logging.
       - name: Diagnose Databricks connectivity + auth
         env:
           DATABRICKS_HOST: https://dogfood.staging.databricks.com
@@ -67,6 +91,8 @@ jobs:
 
       - name: Trigger eval pipeline
         id: trigger
+        # On a bare manual dispatch (no pr_number) we only run diagnostics above.
+        if: github.event_name != 'workflow_dispatch' || inputs.pr_number != ''
         env:
           DATABRICKS_HOST: https://dogfood.staging.databricks.com
           # OAuth M2M as the apps-mcp-evals-runner service principal.
@@ -75,8 +101,8 @@ jobs:
           DATABRICKS_AUTH_TYPE: oauth-m2m
           # The real PR head commit — never the synthetic merge commit — so the
           # pipeline can pull the code.
-          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha || inputs.appkit_ref || github.sha }}
+          PR_NUMBER: ${{ github.event.pull_request.number || inputs.pr_number }}
         # --no-wait: fire-and-forget; run-now otherwise blocks until the eval
         # finishes. The JSON response carries run_id, which we forward to the
         # comment step to link the run (no extra API call).
@@ -100,10 +126,11 @@ jobs:
           } >> "$GITHUB_OUTPUT"
 
       - name: Post / update "Eval running" comment
+        if: github.event_name != 'workflow_dispatch' || inputs.pr_number != ''
         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
         env:
-          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha || inputs.appkit_ref || github.sha }}
+          PR_NUMBER: ${{ github.event.pull_request.number || inputs.pr_number }}
           RUN_JSON: ${{ steps.trigger.outputs.run_json }}
         with:
           script: |

From eb61d576b7ba357610e1dee220dac11679c79430 Mon Sep 17 00:00:00 2001
From: Jorge Calvar <jorge.calvar@databricks.com>
Date: Tue, 9 Jun 2026 11:45:19 +0200
Subject: [PATCH 4/4] ci: add id-token: write so runner can reach
 dogfood.staging

The databricks-protected-runner-group's egress to internal Databricks
hosts is gated by the GitHub OIDC identity. Without `id-token: write` the
egress proxy returns 403 "RBAC: access denied" for every request (incl.
anonymous curl to dogfood.staging), which is what broke OAuth M2M. All
other Databricks workflows in this repo set this permission.

Also revert the temporary manual-dispatch/configurable-runner testing
scaffolding; back to label/synchronize on databricks-protected-runner-group.

Co-authored-by: Isaac
Signed-off-by: Jorge Calvar <jorge.calvar@databricks.com>
---
 .github/workflows/eval-trigger.yml | 62 ++++++++----------------------
 1 file changed, 17 insertions(+), 45 deletions(-)

diff --git a/.github/workflows/eval-trigger.yml b/.github/workflows/eval-trigger.yml
index 774a854ce..660c8dcbb 100644
--- a/.github/workflows/eval-trigger.yml
+++ b/.github/workflows/eval-trigger.yml
@@ -10,55 +10,36 @@ name: Eval Trigger
 # `apps-mcp-evals-runner` service principal (DATABRICKS_CLIENT_ID/SECRET), and
 # those credentials are exposed only to the trigger step, never to the comment
 # step that runs PR-authored script code.
-#
-# workflow_dispatch is provided for manual testing — notably to probe whether a
-# given runner group can reach dogfood.staging (which is network-restricted).
-# A bare manual run executes only the connectivity diagnostic; pass `pr_number`
-# to also trigger the job and post the comment.
 on:
   pull_request:
     types: [labeled, synchronize]
-  workflow_dispatch:
-    inputs:
-      runner_group:
-        description: "Runner group to run on (default: databricks-protected-runner-group)"
-        required: false
-        type: string
-      runner_labels:
-        description: "Runner labels (default: linux-ubuntu-latest)"
-        required: false
-        type: string
-      pr_number:
-        description: "PR number — if set, actually triggers the eval job and posts the comment"
-        required: false
-        type: string
-      appkit_ref:
-        description: "Commit SHA to eval (default: the ref this run is on)"
-        required: false
-        type: string
 
+# `id-token: write` is required for the databricks-protected-runner-group's
+# egress to internal Databricks hosts (incl. dogfood.staging) — without it the
+# egress proxy returns 403 "RBAC: access denied" for every request. Matches the
+# other workflows in this repo (ci.yml, prepare-release.yml, docs-deploy.yml).
 permissions:
   contents: read
   pull-requests: write
+  id-token: write
 
 # Latest push wins: a newer commit cancels the in-flight run for an older one,
 # so the sticky comment always reflects the most recently triggered commit.
 concurrency:
-  group: eval-trigger-${{ github.event.pull_request.number || inputs.pr_number || github.run_id }}
+  group: eval-trigger-${{ github.event.pull_request.number }}
   cancel-in-progress: true
 
 jobs:
   trigger-evals:
     name: Trigger eval pipeline
-    # Run when manually dispatched, when the `run-evals` label is added, or on a
-    # new commit while the PR already carries the label.
+    # Run when the `run-evals` label is added, or on a new commit while the PR
+    # already carries the label.
     if: >-
-      github.event_name == 'workflow_dispatch' ||
       (github.event.action == 'labeled' && github.event.label.name == 'run-evals') ||
       (github.event.action == 'synchronize' && contains(github.event.pull_request.labels.*.name, 'run-evals'))
     runs-on:
-      group: ${{ inputs.runner_group || 'databricks-protected-runner-group' }}
-      labels: ${{ inputs.runner_labels || 'linux-ubuntu-latest' }}
+      group: databricks-protected-runner-group
+      labels: linux-ubuntu-latest
     steps:
       - name: Check out repository
         uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
@@ -66,9 +47,8 @@ jobs:
       - name: Install Databricks CLI
         uses: databricks/setup-cli@772863b94473abd8b0cacbec8b6f80fa0cbe1136 # v1.2.1
 
-      # TEMPORARY diagnostic — remove once auth/connectivity works. Probes
-      # whether this runner can reach dogfood.staging and resolve OIDC, then
-      # tries a forced M2M authenticated call with debug logging.
+      # TEMPORARY diagnostic — remove once confirmed green. Probes dogfood
+      # reachability + OIDC discovery and a forced M2M authenticated call.
       - name: Diagnose Databricks connectivity + auth
         env:
           DATABRICKS_HOST: https://dogfood.staging.databricks.com
@@ -76,23 +56,16 @@ jobs:
           DATABRICKS_CLIENT_SECRET: ${{ secrets.EVALS_DATABRICKS_CLIENT_SECRET_DOGFOOD }}
           DATABRICKS_AUTH_TYPE: oauth-m2m
         run: |
-          echo "::group::CLI version"
-          databricks --version
-          echo "::endgroup::"
           echo "::group::Reachability"
           curl -sS -m 20 -o /dev/null -w "host root:      HTTP %{http_code}\n" "$DATABRICKS_HOST/" || echo "host root: UNREACHABLE"
           curl -sS -m 20 -w "\nworkspace OIDC: HTTP %{http_code}\n" "$DATABRICKS_HOST/oidc/.well-known/oauth-authorization-server" || echo "workspace OIDC: UNREACHABLE"
           echo "::endgroup::"
-          echo "::group::Forced M2M auth (debug)"
-          # current-user me is a cheap authenticated call; --log-level debug
-          # surfaces the real auth-resolution error. Never fails the job.
+          echo "::group::M2M auth check"
           databricks current-user me --log-level debug || true
           echo "::endgroup::"
 
       - name: Trigger eval pipeline
         id: trigger
-        # On a bare manual dispatch (no pr_number) we only run diagnostics above.
-        if: github.event_name != 'workflow_dispatch' || inputs.pr_number != ''
         env:
           DATABRICKS_HOST: https://dogfood.staging.databricks.com
           # OAuth M2M as the apps-mcp-evals-runner service principal.
@@ -101,8 +74,8 @@ jobs:
           DATABRICKS_AUTH_TYPE: oauth-m2m
           # The real PR head commit — never the synthetic merge commit — so the
           # pipeline can pull the code.
-          HEAD_SHA: ${{ github.event.pull_request.head.sha || inputs.appkit_ref || github.sha }}
-          PR_NUMBER: ${{ github.event.pull_request.number || inputs.pr_number }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
         # --no-wait: fire-and-forget; run-now otherwise blocks until the eval
         # finishes. The JSON response carries run_id, which we forward to the
         # comment step to link the run (no extra API call).
@@ -126,11 +99,10 @@ jobs:
           } >> "$GITHUB_OUTPUT"
 
       - name: Post / update "Eval running" comment
-        if: github.event_name != 'workflow_dispatch' || inputs.pr_number != ''
         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
         env:
-          HEAD_SHA: ${{ github.event.pull_request.head.sha || inputs.appkit_ref || github.sha }}
-          PR_NUMBER: ${{ github.event.pull_request.number || inputs.pr_number }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
           RUN_JSON: ${{ steps.trigger.outputs.run_json }}
         with:
           script: |