From cc6d1bcce00a959d4ab4cdb6e164719ba4f9756b Mon Sep 17 00:00:00 2001 From: badaoui Date: Fri, 27 Feb 2026 15:05:08 +0100 Subject: [PATCH 1/2] use logging --- bitsandbytes/autograd/_functions.py | 5 ++++- bitsandbytes/nn/modules.py | 17 ++++++++--------- bitsandbytes/utils.py | 5 ++++- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py index 614c19051..b34f4a943 100644 --- a/bitsandbytes/autograd/_functions.py +++ b/bitsandbytes/autograd/_functions.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +import logging from math import prod from typing import Optional import warnings @@ -8,6 +9,8 @@ import bitsandbytes.functional as F +logger = logging.getLogger(__name__) + # The inverse transformation for the colTuring and colAmpere format were contributed by Alex Borzunov: # https://github.com/bigscience-workshop/petals/blob/main/src/petals/utils/linear8bitlt_patch.py @@ -123,7 +126,7 @@ def forward( # Cast A to fp16 if A.dtype != torch.float16 and not _is_compiling(): - warnings.warn(f"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization") + logger.warning("MatMul8bitLt: inputs will be cast from %s to float16 during quantization", A.dtype) if len(A.shape) == 3: A = A.reshape(-1, A.shape[-1]) diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py index e3c8b49e9..ff8ce9c3d 100644 --- a/bitsandbytes/nn/modules.py +++ b/bitsandbytes/nn/modules.py @@ -3,8 +3,8 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import copy +import logging from typing import Any, Optional, TypeVar, Union, overload -import warnings import torch from torch import Tensor, device, dtype, nn @@ -20,6 +20,8 @@ from bitsandbytes.optim import GlobalOptimManager from bitsandbytes.utils import INVERSE_LINEAR_8BIT_WEIGHTS_FORMAT_MAPPING, OutlierTracer +logger = logging.getLogger(__name__) + T = TypeVar("T", bound="torch.nn.Module") @@ -443,7 +445,7 @@ def fix_4bit_weight_quant_state_from_module(module: Union["Embedding4bit", "Line return if getattr(module, "quant_state", None) is None: - warnings.warn( + logger.warning( "FP4 quantization state not initialized. Please call .cuda() or .to(device) on the LinearFP4 layer first.", ) @@ -536,15 +538,13 @@ def set_compute_type(self, x): if self.compute_dtype in [None, torch.float32] and (x.numel() == x.shape[-1]): # single batch inference with input torch.float16 and compute_dtype float32 -> slow inference when it could be fast # warn the user about this - warnings.warn( + logger.warning( "Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference.", ) - warnings.filterwarnings("ignore", message=".*inference.") if self.compute_dtype in [None, torch.float32] and (x.numel() != x.shape[-1]): - warnings.warn( + logger.warning( "Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.", ) - warnings.filterwarnings("ignore", message=".*inference or training") def _save_to_state_dict(self, destination, prefix, keep_vars): """ @@ -877,7 +877,7 @@ def __init__( blocksize = self.weight.blocksize if embedding_dim % blocksize != 0: - warnings.warn( + logger.warning( f"Embedding size {embedding_dim} is not divisible by block size {blocksize}. " "This will lead to slow inference.", ) @@ -1164,9 +1164,8 @@ def forward(self, x): if self.outlier_dim is None: tracer = OutlierTracer.get_instance() if not tracer.is_initialized(): - print("Please use OutlierTracer.initialize(model) before using the OutlierAwareLinear layer") + logger.warning("Please use OutlierTracer.initialize(model) before using the OutlierAwareLinear layer") outlier_idx = tracer.get_outliers(self.weight) - # print(outlier_idx, tracer.get_hvalue(self.weight)) self.outlier_dim = outlier_idx if not self.is_quantized: diff --git a/bitsandbytes/utils.py b/bitsandbytes/utils.py index 98ccd7da6..513baceab 100644 --- a/bitsandbytes/utils.py +++ b/bitsandbytes/utils.py @@ -1,9 +1,12 @@ import json +import logging import shlex import subprocess import torch +logger = logging.getLogger(__name__) + def outlier_hook(module, input): assert isinstance(module, torch.nn.Linear) @@ -65,7 +68,7 @@ def get_hvalue(self, weight): def get_outliers(self, weight): if not self.is_initialized(): - print("Outlier tracer is not initialized...") + logger.warning("Outlier tracer is not initialized...") return None hvalue = self.get_hvalue(weight) if hvalue in self.hvalue2outlier_idx: From 247a34d906045614a4d91ae4853ff8075d5fbdfa Mon Sep 17 00:00:00 2001 From: badaoui Date: Fri, 27 Feb 2026 15:38:58 +0100 Subject: [PATCH 2/2] fix --- tests/test_modules.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/test_modules.py b/tests/test_modules.py index e5682e5c8..5185463aa 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -1,4 +1,6 @@ +import contextlib import inspect +import logging import pytest import torch @@ -8,6 +10,12 @@ from tests.helpers import get_available_devices, id_formatter, is_supported_on_hpu +@contextlib.contextmanager +def caplog_at_level(caplog, level, logger_name): + with caplog.at_level(level, logger=logger_name): + yield + + class MockArgs: def __init__(self, initial_data): for key in initial_data: @@ -453,46 +461,38 @@ def test_embedding_error(device, embedding_class, input_shape, embedding_dim, qu @pytest.mark.parametrize("device", get_available_devices()) -def test_4bit_linear_warnings(device): +def test_4bit_linear_warnings(device, caplog): dim1 = 64 - with pytest.warns(UserWarning, match=r"inference or training"): - net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4") for i in range(10)]) - net = net.to(device) - inp = torch.rand(10, dim1, device=device, dtype=torch.float16) - net(inp) - with pytest.warns(UserWarning, match=r"inference."): - net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4") for i in range(10)]) - net = net.to(device) - inp = torch.rand(1, dim1, device=device, dtype=torch.float16) - net(inp) - - with pytest.warns(UserWarning) as record: + with caplog_at_level(caplog, logging.WARNING, "bitsandbytes.nn.modules"): net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4") for i in range(10)]) net = net.to(device) inp = torch.rand(10, dim1, device=device, dtype=torch.float16) net(inp) + assert any("inference or training" in msg for msg in caplog.messages) + caplog.clear() + with caplog_at_level(caplog, logging.WARNING, "bitsandbytes.nn.modules"): net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4") for i in range(10)]) net = net.to(device) inp = torch.rand(1, dim1, device=device, dtype=torch.float16) net(inp) - - assert len(record) == 2 + assert any("inference." in msg for msg in caplog.messages) @pytest.mark.parametrize("device", get_available_devices()) -def test_4bit_embedding_warnings(device): +def test_4bit_embedding_warnings(device, caplog): num_embeddings = 128 default_block_size = 64 - with pytest.warns(UserWarning, match=r"inference."): + with caplog_at_level(caplog, logging.WARNING, "bitsandbytes.nn.modules"): net = bnb.nn.Embedding4bit( num_embeddings=num_embeddings, embedding_dim=default_block_size + 1, quant_type="nf4" ) net.to(device) inp = torch.randint(low=0, high=num_embeddings, size=(1,), device=device) net(inp) + assert any("inference" in msg for msg in caplog.messages) def test_4bit_embedding_weight_fsdp_fix(requires_cuda):