From a1804cfa802052d70bd5f1504c2119528068d1f5 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 5 Feb 2026 09:47:15 +0530
Subject: [PATCH 1/3] make modules_to_not_convert actually run.

---
 tests/models/testing_utils/quantization.py       | 10 ----------
 .../transformers/test_models_transformer_flux.py | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/tests/models/testing_utils/quantization.py b/tests/models/testing_utils/quantization.py
index f27e912766e5..9ecbd89b9ce4 100644
--- a/tests/models/testing_utils/quantization.py
+++ b/tests/models/testing_utils/quantization.py
@@ -21,11 +21,8 @@
 from diffusers import BitsAndBytesConfig, GGUFQuantizationConfig, NVIDIAModelOptConfig, QuantoConfig, TorchAoConfig
 from diffusers.utils.import_utils import (
     is_bitsandbytes_available,
-    is_gguf_available,
     is_nvidia_modelopt_available,
     is_optimum_quanto_available,
-    is_torchao_available,
-    is_torchao_version,
 )
 
 from ...testing_utils import (
@@ -59,13 +56,6 @@
 if is_optimum_quanto_available():
     from optimum.quanto import QLinear
 
-if is_gguf_available():
-    pass
-
-if is_torchao_available():
-    if is_torchao_version(">=", "0.9.0"):
-        pass
-
 
 class LoRALayer(torch.nn.Module):
     """Wraps a linear layer with LoRA-like adapter - Used for testing purposes only.
diff --git a/tests/models/transformers/test_models_transformer_flux.py b/tests/models/transformers/test_models_transformer_flux.py
index 2d39dadfcad1..8d51e678550b 100644
--- a/tests/models/transformers/test_models_transformer_flux.py
+++ b/tests/models/transformers/test_models_transformer_flux.py
@@ -318,6 +318,10 @@ def pretrained_model_name_or_path(self):
 class TestFluxTransformerBitsAndBytes(FluxTransformerTesterConfig, BitsAndBytesTesterMixin):
     """BitsAndBytes quantization tests for Flux Transformer."""
 
+    @property
+    def modules_to_not_convert_for_test(self):
+        return ["transformer_blocks.0"]
+
 
 class TestFluxTransformerQuanto(FluxTransformerTesterConfig, QuantoTesterMixin):
     """Quanto quantization tests for Flux Transformer."""
@@ -330,10 +334,18 @@ def pretrained_model_name_or_path(self):
     def pretrained_model_kwargs(self):
         return {}
 
+    @property
+    def modules_to_not_convert_for_test(self):
+        return ["transformer_blocks.0"]
+
 
 class TestFluxTransformerTorchAo(FluxTransformerTesterConfig, TorchAoTesterMixin):
     """TorchAO quantization tests for Flux Transformer."""
 
+    @property
+    def modules_to_not_convert_for_test(self):
+        return ["transformer_blocks.0"]
+
 
 class TestFluxTransformerGGUF(FluxTransformerTesterConfig, GGUFTesterMixin):
     @property
@@ -402,6 +414,10 @@ def get_dummy_inputs(self):
 class TestFluxTransformerModelOpt(FluxTransformerTesterConfig, ModelOptTesterMixin):
     """ModelOpt quantization tests for Flux Transformer."""
 
+    @property
+    def modules_to_not_convert_for_test(self):
+        return ["transformer_blocks.0"]
+
 
 class TestFluxTransformerModelOptCompile(FluxTransformerTesterConfig, ModelOptCompileTesterMixin):
     """ModelOpt + compile tests for Flux Transformer."""

From e117274aa553b81d2d7a64789ae586cde5c16354 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 10 Feb 2026 13:49:05 +0530
Subject: [PATCH 2/3] fix bnb modules_to_convert.

---
 tests/models/testing_utils/quantization.py       | 16 +++++++++-------
 .../transformers/test_models_transformer_flux.py |  8 ++++----
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/tests/models/testing_utils/quantization.py b/tests/models/testing_utils/quantization.py
index 9ecbd89b9ce4..482806826162 100644
--- a/tests/models/testing_utils/quantization.py
+++ b/tests/models/testing_utils/quantization.py
@@ -122,14 +122,14 @@ def _create_quantized_model(self, config_kwargs, **extra_kwargs):
     def _verify_if_layer_quantized(self, name, module, config_kwargs):
         raise NotImplementedError("Subclass must implement _verify_if_layer_quantized")
 
-    def _is_module_quantized(self, module):
+    def _is_module_quantized(self, module, quant_config_kwargs=None):
         """
         Check if a module is quantized. Returns True if quantized, False otherwise.
         Default implementation tries _verify_if_layer_quantized and catches exceptions.
         Subclasses can override for more efficient checking.
         """
         try:
-            self._verify_if_layer_quantized("", module, {})
+            self._verify_if_layer_quantized("", module, quant_config_kwargs or {})
             return True
         except (AssertionError, AttributeError):
             return False
@@ -269,7 +269,9 @@ def _test_quantized_layers(self, config_kwargs):
             f"Quantized layer count mismatch: expected {expected_quantized_layers}, got {num_quantized_layers} (total linear layers: {num_linear_layers}, FP32 modules: {num_fp32_modules})"
         )
 
-    def _test_quantization_modules_to_not_convert(self, config_kwargs, modules_to_not_convert):
+    def _test_quantization_modules_to_not_convert(
+        self, config_kwargs, modules_to_not_convert, to_not_convert_key="modules_to_not_convert"
+    ):
         """
         Test that modules specified in modules_to_not_convert are not quantized.
 
@@ -279,7 +281,7 @@ def _test_quantization_modules_to_not_convert(self, config_kwargs, modules_to_no
         """
         # Create config with modules_to_not_convert
         config_kwargs_with_exclusion = config_kwargs.copy()
-        config_kwargs_with_exclusion["modules_to_not_convert"] = modules_to_not_convert
+        config_kwargs_with_exclusion[to_not_convert_key] = modules_to_not_convert
 
         model_with_exclusion = self._create_quantized_model(config_kwargs_with_exclusion)
 
@@ -291,7 +293,7 @@ def _test_quantization_modules_to_not_convert(self, config_kwargs, modules_to_no
                 if any(excluded in name for excluded in modules_to_not_convert):
                     found_excluded = True
                     # This module should NOT be quantized
-                    assert not self._is_module_quantized(module), (
+                    assert not self._is_module_quantized(module, config_kwargs_with_exclusion), (
                         f"Module {name} should not be quantized but was found to be quantized"
                     )
 
@@ -303,7 +305,7 @@ def _test_quantization_modules_to_not_convert(self, config_kwargs, modules_to_no
             if isinstance(module, torch.nn.Linear):
                 # Check if this module is NOT in the exclusion list
                 if not any(excluded in name for excluded in modules_to_not_convert):
-                    if self._is_module_quantized(module):
+                    if self._is_module_quantized(module, config_kwargs_with_exclusion):
                         found_quantized = True
                         break
 
@@ -608,7 +610,7 @@ def test_bnb_modules_to_not_convert(self):
             pytest.skip("modules_to_not_convert_for_test not defined for this model")
 
         self._test_quantization_modules_to_not_convert(
-            BitsAndBytesConfigMixin.BNB_CONFIGS["4bit_nf4"], modules_to_exclude
+            BitsAndBytesConfigMixin.BNB_CONFIGS["4bit_nf4"], modules_to_exclude, "llm_int8_skip_modules"
         )
 
     @pytest.mark.parametrize("config_name", ["4bit_nf4", "8bit"], ids=["4bit_nf4", "8bit"])
diff --git a/tests/models/transformers/test_models_transformer_flux.py b/tests/models/transformers/test_models_transformer_flux.py
index 8d51e678550b..4008b66e3bcb 100644
--- a/tests/models/transformers/test_models_transformer_flux.py
+++ b/tests/models/transformers/test_models_transformer_flux.py
@@ -320,7 +320,7 @@ class TestFluxTransformerBitsAndBytes(FluxTransformerTesterConfig, BitsAndBytesT
 
     @property
     def modules_to_not_convert_for_test(self):
-        return ["transformer_blocks.0"]
+        return ["norm_out.linear"]
 
 
 class TestFluxTransformerQuanto(FluxTransformerTesterConfig, QuantoTesterMixin):
@@ -336,7 +336,7 @@ def pretrained_model_kwargs(self):
 
     @property
     def modules_to_not_convert_for_test(self):
-        return ["transformer_blocks.0"]
+        return ["norm_out.linear"]
 
 
 class TestFluxTransformerTorchAo(FluxTransformerTesterConfig, TorchAoTesterMixin):
@@ -344,7 +344,7 @@ class TestFluxTransformerTorchAo(FluxTransformerTesterConfig, TorchAoTesterMixin
 
     @property
     def modules_to_not_convert_for_test(self):
-        return ["transformer_blocks.0"]
+        return ["norm_out.linear"]
 
 
 class TestFluxTransformerGGUF(FluxTransformerTesterConfig, GGUFTesterMixin):
@@ -416,7 +416,7 @@ class TestFluxTransformerModelOpt(FluxTransformerTesterConfig, ModelOptTesterMix
 
     @property
     def modules_to_not_convert_for_test(self):
-        return ["transformer_blocks.0"]
+        return ["norm_out.linear"]
 
 
 class TestFluxTransformerModelOptCompile(FluxTransformerTesterConfig, ModelOptCompileTesterMixin):

From d676b0349005bf25d449a180f62b01bb22d7d596 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 10 Feb 2026 15:32:41 +0530
Subject: [PATCH 3/3] fix torchao/.

---
 tests/models/testing_utils/quantization.py | 43 ++++++++++++++++++++--
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/tests/models/testing_utils/quantization.py b/tests/models/testing_utils/quantization.py
index 482806826162..b20453540030 100644
--- a/tests/models/testing_utils/quantization.py
+++ b/tests/models/testing_utils/quantization.py
@@ -809,7 +809,14 @@ def _create_quantized_model(self, config_kwargs, **extra_kwargs):
         return self.model_class.from_pretrained(self.pretrained_model_name_or_path, **kwargs)
 
     def _verify_if_layer_quantized(self, name, module, config_kwargs):
+        from torchao.dtypes import AffineQuantizedTensor
+        from torchao.quantization.linear_activation_quantized_tensor import LinearActivationQuantizedTensor
+
         assert isinstance(module, torch.nn.Linear), f"Layer {name} is not Linear, got {type(module)}"
+        # Check if the weight is actually quantized
+        weight = module.weight
+        is_quantized = isinstance(weight, (AffineQuantizedTensor, LinearActivationQuantizedTensor))
+        assert is_quantized, f"Layer {name} weight is not quantized, got {type(weight)}"
 
 
 # int4wo requires CUDA-specific ops (_convert_weight_to_int4pack)
@@ -905,9 +912,39 @@ def test_torchao_modules_to_not_convert(self):
         if modules_to_exclude is None:
             pytest.skip("modules_to_not_convert_for_test not defined for this model")
 
-        self._test_quantization_modules_to_not_convert(
-            TorchAoConfigMixin.TORCHAO_QUANT_TYPES["int8wo"], modules_to_exclude
-        )
+        # Custom implementation for torchao that skips memory footprint check
+        # because get_memory_footprint() doesn't accurately reflect torchao quantization
+        config_kwargs = TorchAoConfigMixin.TORCHAO_QUANT_TYPES["int8wo"]
+        config_kwargs_with_exclusion = config_kwargs.copy()
+        config_kwargs_with_exclusion["modules_to_not_convert"] = modules_to_exclude
+
+        model_with_exclusion = self._create_quantized_model(config_kwargs_with_exclusion)
+
+        # Find a module that should NOT be quantized
+        found_excluded = False
+        for name, module in model_with_exclusion.named_modules():
+            if isinstance(module, torch.nn.Linear):
+                # Check if this module is in the exclusion list
+                if any(excluded in name for excluded in modules_to_exclude):
+                    found_excluded = True
+                    # This module should NOT be quantized
+                    assert not self._is_module_quantized(module, config_kwargs_with_exclusion), (
+                        f"Module {name} should not be quantized but was found to be quantized"
+                    )
+
+        assert found_excluded, f"No linear layers found in excluded modules: {modules_to_exclude}"
+
+        # Find a module that SHOULD be quantized (not in exclusion list)
+        found_quantized = False
+        for name, module in model_with_exclusion.named_modules():
+            if isinstance(module, torch.nn.Linear):
+                # Check if this module is NOT in the exclusion list
+                if not any(excluded in name for excluded in modules_to_exclude):
+                    if self._is_module_quantized(module, config_kwargs_with_exclusion):
+                        found_quantized = True
+                        break
+
+        assert found_quantized, "No quantized layers found outside of excluded modules"
 
     def test_torchao_device_map(self):
         """Test that device_map='auto' works correctly with quantization."""