Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 145 additions & 26 deletions .github/workflows/build-test-linux-x86_64.yml

Large diffs are not rendered by default.

129 changes: 112 additions & 17 deletions .github/workflows/build-test-linux-x86_64_rtx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,12 @@
use-rtx: true
script: |
set -euo pipefail
# Pull in trt_pytest (reruns + reproduce hint). One source of truth
# for the rerun regex; see tests/py/ci_helpers.sh.
source tests/py/ci_helpers.sh
pushd .
cd tests/py/dynamo
python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_converter_tests_results.xml --maxfail=20 conversion/
trt_pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_converter_tests_results.xml --maxfail=20 conversion/
popd

L0-dynamo-core-tests:
Expand Down Expand Up @@ -136,12 +139,15 @@
use-rtx: true
script: |
set -euo pipefail
# Pull in trt_pytest (reruns + reproduce hint). One source of truth
# for the rerun regex; see tests/py/ci_helpers.sh.
source tests/py/ci_helpers.sh
pushd .
cd tests/py
cd dynamo
python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_runtime_tests_results.xml runtime/test_000_*
python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_partitioning_tests_results.xml partitioning/
python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_lowering_tests_results.xml lowering/
trt_pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_runtime_tests_results.xml runtime/test_000_*
trt_pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_partitioning_tests_results.xml partitioning/
trt_pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_lowering_tests_results.xml lowering/
popd

L0-py-core-tests:
Expand Down Expand Up @@ -170,9 +176,12 @@
use-rtx: true
script: |
set -euo pipefail
# Pull in trt_pytest (reruns + reproduce hint). One source of truth
# for the rerun regex; see tests/py/ci_helpers.sh.
source tests/py/ci_helpers.sh
pushd .
cd tests/py/core
python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_py_core_tests_results.xml .
trt_pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_py_core_tests_results.xml .
popd

L1-dynamo-core-tests:
Expand Down Expand Up @@ -201,16 +210,19 @@
use-rtx: true
script: |
set -euo pipefail
# Pull in trt_pytest (reruns + reproduce hint). One source of truth
# for the rerun regex; see tests/py/ci_helpers.sh.
source tests/py/ci_helpers.sh
pushd .
cd tests/py/dynamo
python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_core_tests_results.xml runtime/test_001_*
python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_hlo_tests_results.xml hlo/
trt_pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_core_tests_results.xml runtime/test_001_*
trt_pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_hlo_tests_results.xml hlo/
popd

L1-dynamo-compile-tests:
name: ${{ matrix.display-name }}
needs: [filter-matrix, build, L0-dynamo-converter-tests, L0-dynamo-core-tests, L0-py-core-tests]
if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }}
if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}"
strategy:
fail-fast: false
matrix:
Expand All @@ -233,15 +245,18 @@
use-rtx: true
script: |
set -euo pipefail
# Pull in trt_pytest (reruns + reproduce hint). One source of truth
# for the rerun regex; see tests/py/ci_helpers.sh.
source tests/py/ci_helpers.sh
pushd .
cd tests/py/dynamo/
python -m pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_compile_tests_results.xml models/
trt_pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_compile_tests_results.xml models/
popd

L1-torch-compile-tests:
name: ${{ matrix.display-name }}
needs: [filter-matrix, build, L0-dynamo-converter-tests, L0-dynamo-core-tests, L0-py-core-tests]
if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }}
if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}"
strategy:
fail-fast: false
matrix:
Expand All @@ -264,18 +279,21 @@
use-rtx: true
script: |
set -euo pipefail
# Pull in trt_pytest (reruns + reproduce hint). One source of truth
# for the rerun regex; see tests/py/ci_helpers.sh.
source tests/py/ci_helpers.sh
pushd .
cd tests/py/dynamo/
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_be_tests_results.xml backend/
python -m pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_models_tests_results.xml --ir torch_compile models/test_models.py
python -m pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_dyn_models_tests_results.xml --ir torch_compile models/test_dyn_models.py
trt_pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_be_tests_results.xml backend/
trt_pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_models_tests_results.xml --ir torch_compile models/test_models.py
trt_pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_dyn_models_tests_results.xml --ir torch_compile models/test_dyn_models.py
popd


L2-torch-compile-tests:
name: ${{ matrix.display-name }}
needs: [filter-matrix, build, L1-torch-compile-tests, L1-dynamo-compile-tests, L1-dynamo-core-tests]
if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }}
if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}"
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -307,7 +325,7 @@
L2-dynamo-compile-tests:
name: ${{ matrix.display-name }}
needs: [filter-matrix, build, L1-dynamo-compile-tests, L1-dynamo-core-tests, L1-torch-compile-tests]
if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }}
if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && !contains(github.event.pull_request.labels.*.name, 'ci: skip-l2') && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}"
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -339,7 +357,7 @@
L2-dynamo-core-tests:
name: ${{ matrix.display-name }}
needs: [filter-matrix, build, L1-dynamo-core-tests, L1-dynamo-compile-tests, L1-torch-compile-tests]
if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }}
if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && !contains(github.event.pull_request.labels.*.name, 'ci: skip-l2') && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}"
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -370,7 +388,7 @@
L2-dynamo-plugin-tests:
name: ${{ matrix.display-name }}
needs: [filter-matrix, build, L1-dynamo-core-tests, L1-dynamo-compile-tests, L1-torch-compile-tests]
if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }}
if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && !contains(github.event.pull_request.labels.*.name, 'ci: skip-l2') && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}"
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -398,7 +416,84 @@
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_dynamo_plugins_tests_results.xml automatic_plugin/
popd

# Single rollup status for the RTX matrix; mirror the non-RTX workflow's
# ci-rollup so branch protection can require one check per workflow.
ci-rollup:
name: CI / Linux x86_64 (RTX)
if: ${{ always() }}
needs:
[
build,
L0-dynamo-converter-tests,
L0-dynamo-core-tests,
L0-py-core-tests,
L1-dynamo-core-tests,
L1-dynamo-compile-tests,
L1-torch-compile-tests,
L2-torch-compile-tests,
L2-dynamo-compile-tests,
L2-dynamo-core-tests,
L2-dynamo-plugin-tests,
]
runs-on: ubuntu-latest
steps:
- name: Aggregate job results
env:
RESULTS: ${{ toJSON(needs) }}
# Surface a label so the markdown summary disambiguates RTX vs standard.
WORKFLOW_LABEL: "Linux x86_64 (RTX)"
run: |
set -euo pipefail
# Same logic as the non-RTX rollup: stdout for the rollup status,
# $GITHUB_STEP_SUMMARY for the reviewer-facing markdown table.
python3 - <<'PY'
import json, os, sys
needs = json.loads(os.environ["RESULTS"])
label = os.environ.get("WORKFLOW_LABEL", "Linux x86_64")
by_result = {"success": [], "failure": [], "skipped": [], "cancelled": []}
for name, info in needs.items():
by_result.setdefault(info.get("result") or "unknown", []).append(name)
failed = sorted(by_result["failure"])
passed = sorted(by_result["success"])
skipped = sorted(by_result["skipped"])
cancelled = sorted(by_result["cancelled"])

print(f"PASS: {len(passed)}")
print(f"FAIL: {len(failed)}")
print(f"SKIPPED: {len(skipped)} (label-gated or never started)")
print(f"CANCELLED: {len(cancelled)}")
if failed:
print()
print("Failed jobs:")
for name in failed:
print(f" - {name}")

summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if summary_path:
icon = {"success": "✅", "failure": "❌", "skipped": "⏭️", "cancelled": "🚫"}
with open(summary_path, "a", encoding="utf-8") as f:
f.write(f"# CI / {label} — rollup\n\n")
f.write(
f"**{len(passed)}** passed · "
f"**{len(failed)}** failed · "
f"**{len(skipped)}** skipped · "
f"**{len(cancelled)}** cancelled\n\n"
)
f.write("| Result | Job |\n|---|---|\n")
for status in ("failure", "cancelled", "skipped", "success"):
for name in sorted(by_result.get(status, [])):
f.write(f"| {icon.get(status, '?')} {status} | `{name}` |\n")
if failed:
f.write(
"\n> Click into a failed job above to see the "
"rendered test table (via `pytest-results-action`) "
"and the `::warning::Reproduce locally with: ...` "
"hint near the bottom of the log.\n"
)

if failed:
sys.exit(1)
PY

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {}
Comment on lines +422 to +496

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-rtx-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
Expand Down
12 changes: 12 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ We use the PyTorch Slack for communication about core development, integration w

- Document hacks, we can discuss it only if we can find it

### Controlling CI scope via PR labels

A full CI run is ~150 jobs across {Python 3.10–3.13} × {CUDA 13.0, 13.2} × {build, L0, L1, L2}. To keep PR feedback fast we let you shape what runs via labels — apply them in the PR's right sidebar and re-push (or close/reopen) to re-trigger:

| Label | Effect |
|---|---|
| `ci: only-l0` | Skip L1 and L2 jobs. Useful for docs / build-system changes where only smoke matters. |
| `ci: skip-l2` | Run L0 + L1, skip L2 (the slow model-level suites). |
| `Force All Tests[L0+L1+L2]` | Pre-existing — force every tier to run even if an earlier tier failed. Used when investigating cascading failures. |

PRs without any of these labels run the default set: build + L0 + L1 + L2, with L1/L2 gated on the previous tier's success so a fundamental build break doesn't waste 30 min of test capacity.

### Commits and PRs

- Try to keep pull requests focused (multiple pull requests are okay). Typically PRs should focus on a single issue or a small collection of closely related issue.
Expand Down
5 changes: 4 additions & 1 deletion py/torch_tensorrt/dynamo/runtime/_TRTEngine.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,10 +700,13 @@ def device_memory_budget(self) -> Any:
def device_memory_budget(self, budget_bytes: int) -> None:
if budget_bytes < 0:
budget_bytes = self.streamable_device_memory_budget
# TRT 11+ rejects setWeightStreamingBudgetV2 while an IExecutionContext
# is alive (use_count must be 1). Drop the context BEFORE setting the
# budget — matches the C++ runtime's TRTEngine::set_device_memory_budget.
self.invalidate_context()
self.cuda_engine.weight_streaming_budget_v2 = budget_bytes
if self.cuda_engine.weight_streaming_budget_v2 != budget_bytes:
logger.error(f"Failed to set weight streaming budget to {budget_bytes}")
self.invalidate_context()
self.runtime_states.context_changed = True

def reset_captured_graph(self) -> None:
Expand Down
14 changes: 14 additions & 0 deletions py/torch_tensorrt/runtime/_runtime_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,20 @@ def __init__(self, path: str = "") -> None:
self._pending_warm_bytes: Optional[bytes] = None
self._lock = threading.Lock()

def __getstate__(self) -> dict:
# ``threading.Lock`` is not picklable, which breaks ``copy.deepcopy``
# on any GraphModule that has us in its state (the cross-runtime
# export path calls deepcopy on the gm before re-tracing). The lock
# guards in-process mutations only; a freshly-deserialized cache
# always needs a new lock anyway.
state = self.__dict__.copy()
state.pop("_lock", None)
return state

def __setstate__(self, state: dict) -> None:
self.__dict__.update(state)
self._lock = threading.Lock()

def serialize(self) -> torch.Tensor:
with self._lock:
if self._cache is None:
Expand Down
15 changes: 13 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,15 @@ test = [
"parameterized>=0.2.0",
"pytest>=8.2.1",
"pytest-forked>=1.6.0",
# Emits ``::error file=...,line=...::`` annotations on test failure so
# GitHub renders them inline on the PR's Files Changed tab. Inert when
# ``GITHUB_ACTIONS`` is unset (local runs are unaffected).
"pytest-github-actions-annotate-failures>=0.2.0",
# pytest-rerunfailures lets CI retry tests that hit known transient
# CUDA / cudagraphs / engine-deserialization failures (see L0/L1
# ``--only-rerun`` regex in the workflows) without papering over real
# bugs in numerical / model-accuracy suites where it is omitted.
"pytest-rerunfailures>=14.0",
"pytest-xdist>=3.6.1",
"pyyaml",
"setuptools",
Expand Down Expand Up @@ -110,8 +119,10 @@ quantization = [
]

# Optional runtime deps for the torch_tensorrt.kernels QDP-plugin layer,
# which compiles user-supplied CUDA C++ kernels via NVRTC.
kernels = ["cuda-python"]
# which compiles user-supplied CUDA C++ kernels via NVRTC. The high-level
# launch/compile API (``cuda.core``) lives in cuda-core; cuda-python's
# bindings are still pulled in for the lower-level driver/runtime shims.
kernels = ["cuda-python", "cuda-core"]

[project.urls]
Homepage = "https://pytorch.org/tensorrt"
Expand Down
26 changes: 26 additions & 0 deletions tests/py/ci_helpers.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Shared shell helpers for Torch-TensorRT CI test scripts.
# Sourced from L0/L1 script blocks in .github/workflows/build-test-linux-x86_64*.yml.
#
# Update this file (not the YAMLs) when adjusting the pytest rerun policy or
# the reproduce-locally hint. Tested only via running CI; if you change a
# function signature, audit every ``source tests/py/ci_helpers.sh`` site.

# trt_pytest wraps ``python -m pytest`` with:
# * --reruns 1: retry once on known transient cudagraphs/TRT-driver flakes.
# Expand the regex below only with concrete evidence; broad
# regexes hide real bugs.
# * an inline ``::warning::`` reproduce hint on failure so reviewers can
# copy-paste the exact local repro command.
#
# Usage (inside an L0/L1 script: | block):
# source tests/py/ci_helpers.sh
# cd tests/py/dynamo
# trt_pytest -ra -n 8 --junitxml="$RUNNER_TEST_RESULTS_DIR/foo.xml" runtime/test_001_*
trt_pytest() {
local rerun='--reruns 1 --reruns-delay 5'
local only_rerun='--only-rerun cudaErrorStreamCaptureInvalidated --only-rerun "Stream capture invalidated"'
if ! python -m pytest $rerun $only_rerun "$@"; then
echo "::warning::pytest failed. Reproduce locally with: cd $(pwd) && uv run pytest $*"
return 1
fi
}
5 changes: 2 additions & 3 deletions tests/py/dynamo/conversion/test_cumsum_aten.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import sys
import unittest

import torch
Expand All @@ -11,8 +10,8 @@


@unittest.skipIf(
torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx and sys.platform == "win32",
"cumsum errors out on TensorRT-RTX on Windows",
torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx,
"cumsum is not supported on TensorRT-RTX (build_serialized_network returns None on Linux as well as Windows)",
)
class TestCumsumConverter(DispatchTestCase):
@parameterized.expand(
Expand Down
Loading
Loading