diff --git a/haystack/core/super_component/super_component.py b/haystack/core/super_component/super_component.py index 7ab28ef791..3728bfe6b7 100644 --- a/haystack/core/super_component/super_component.py +++ b/haystack/core/super_component/super_component.py @@ -119,7 +119,8 @@ def run(self, **kwargs: Any) -> dict[str, Any]: :returns: Dictionary containing the SuperComponent's output values """ - filtered_inputs = {param: value for param, value in kwargs.items() if value != _delegate_default} + # `is not`, not `!=`: numpy/pandas/torch override `__ne__` element-wise and would crash here. + filtered_inputs = {param: value for param, value in kwargs.items() if value is not _delegate_default} pipeline_inputs = self._map_explicit_inputs(input_mapping=self.input_mapping, inputs=filtered_inputs) include_outputs_from = self._get_include_outputs_from() pipeline_outputs = self.pipeline.run(data=pipeline_inputs, include_outputs_from=include_outputs_from) @@ -147,7 +148,8 @@ async def run_async(self, **kwargs: Any) -> dict[str, Any]: if not isinstance(self.pipeline, AsyncPipeline): raise TypeError("Pipeline is not an AsyncPipeline. run_async is not supported.") - filtered_inputs = {param: value for param, value in kwargs.items() if value != _delegate_default} + # `is not`, not `!=`: numpy/pandas/torch override `__ne__` element-wise and would crash here. + filtered_inputs = {param: value for param, value in kwargs.items() if value is not _delegate_default} pipeline_inputs = self._map_explicit_inputs(input_mapping=self.input_mapping, inputs=filtered_inputs) pipeline_outputs = await self.pipeline.run_async(data=pipeline_inputs) return self._map_explicit_outputs(pipeline_outputs, self.output_mapping) diff --git a/releasenotes/notes/fix-super-component-delegate-default-identity-937ee58b7e3fcb44.yaml b/releasenotes/notes/fix-super-component-delegate-default-identity-937ee58b7e3fcb44.yaml new file mode 100644 index 0000000000..9bc9167fa0 --- /dev/null +++ b/releasenotes/notes/fix-super-component-delegate-default-identity-937ee58b7e3fcb44.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + Fixed ``SuperComponent.run`` and ``SuperComponent.run_async`` raising + ``ValueError: The truth value of ... is ambiguous`` whenever a non-scalar + input (numpy arrays, pandas DataFrames/Series, torch tensors, or any + object whose ``__ne__`` returns a non-scalar) was passed. The internal + sentinel filter is now an identity check (``is not _delegate_default``) + instead of value equality (``!= _delegate_default``), so it no longer + invokes ``__ne__`` on user-provided values. diff --git a/test/core/super_component/test_super_component.py b/test/core/super_component/test_super_component.py index 82dcdafa09..32f40a18f8 100644 --- a/test/core/super_component/test_super_component.py +++ b/test/core/super_component/test_super_component.py @@ -5,6 +5,7 @@ from typing import Any from unittest.mock import patch +import numpy as np import pytest from haystack import AsyncPipeline, Document, Pipeline, SuperComponent, component, super_component @@ -471,3 +472,49 @@ async def run_async(self): result = await deserialized_super_component.run_async() assert result == {"output": "Hello world"} + + +@component +class _CaptureValue: + """Captures whatever value reaches the inner pipeline, for both sync and async runs.""" + + def __init__(self) -> None: + self.captured: dict[str, Any] = {} + + @component.output_types(seen=bool) + def run(self, value: Any = None) -> dict[str, bool]: + self.captured["value"] = value + return {"seen": True} + + @component.output_types(seen=bool) + async def run_async(self, value: Any = None) -> dict[str, bool]: + self.captured["value"] = value + return {"seen": True} + + +class TestSuperComponentDelegateDefaultFiltering: + """ + Regression for the `_delegate_default` filter: must be `is not`, not `!=`, otherwise + numpy / pandas / torch inputs raise `ValueError: The truth value of ... is ambiguous`. + """ + + def test_run_accepts_numpy_ndarray_input(self): + inner = _CaptureValue() + pipe = Pipeline() + pipe.add_component("capture", inner) + + result = SuperComponent(pipe).run(value=np.array([1, 2, 3])) + + assert result == {"seen": True} + assert np.array_equal(inner.captured["value"], np.array([1, 2, 3])) + + @pytest.mark.asyncio + async def test_run_async_accepts_numpy_ndarray_input(self): + inner = _CaptureValue() + pipe = AsyncPipeline() + pipe.add_component("capture", inner) + + result = await SuperComponent(pipe).run_async(value=np.array([4, 5, 6])) + + assert result == {"seen": True} + assert np.array_equal(inner.captured["value"], np.array([4, 5, 6]))