Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ set(ARROW_UTIL_SRCS
util/time.cc
util/tracing.cc
util/trie.cc
util/ulp_distance.cc
util/union_util.cc
util/unreachable.cc
util/uri.cc
Expand Down
245 changes: 117 additions & 128 deletions cpp/src/arrow/compare.cc

Large diffs are not rendered by default.

38 changes: 35 additions & 3 deletions cpp/src/arrow/compare.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class SparseTensor;
struct Scalar;

static constexpr double kDefaultAbsoluteTolerance = 1E-5;
static constexpr uint16_t kDefaultUlpDistance = 4;

/// A container of options for equality comparisons
class EqualOptions {
Expand Down Expand Up @@ -66,6 +67,8 @@ class EqualOptions {
bool use_atol() const { return use_atol_; }

/// Return a new EqualOptions object with the "use_atol" property changed.
/// If both "ulp_distance" and "atol" are specified, the comparison
/// succeeds when either condition is satisfied.
EqualOptions use_atol(bool v) const {
auto res = EqualOptions(*this);
res.use_atol_ = v;
Expand Down Expand Up @@ -115,6 +118,31 @@ class EqualOptions {
return res;
}

/// Whether the "ulp_distance" property is used in the comparison.
///
/// This option only affects the Equals methods
/// and has no effect on ApproxEquals methods.
/// If both "ulp_distance" and "atol" are specified, the comparison
/// succeeds when either condition is satisfied.
bool use_ulp_distance() const { return use_ulp_distance_; }

/// Return a new EqualOptions object with the "use_ulp_distance" property changed.
EqualOptions use_ulp_distance(bool v) const {
auto res = EqualOptions(*this);
res.use_ulp_distance_ = v;
return res;
}

/// The ulp distance for approximate comparisons of floating-point values.
/// Note that this option is ignored if "use_ulp_distance" is set to false.
uint16_t ulp_distance() const { return ulp_distance_; }

/// Return a new EqualOptions object with the "ulp_distance" property changed.
EqualOptions ulp_distance(uint16_t v) {
auto res = EqualOptions(*this);
res.ulp_distance_ = v;
return res;
}
/// The ostream to which a diff will be formatted if arrays disagree.
/// If this is null (the default) no diff will be formatted.
std::ostream* diff_sink() const { return diff_sink_; }
Expand All @@ -132,11 +160,13 @@ class EqualOptions {

protected:
double atol_ = kDefaultAbsoluteTolerance;
uint16_t ulp_distance_ = kDefaultUlpDistance;
bool nans_equal_ = false;
bool signed_zeros_equal_ = true;
bool use_atol_ = false;
bool use_schema_ = true;
bool use_metadata_ = false;
bool use_ulp_distance_ = false;

std::ostream* diff_sink_ = NULLPTR;
};
Expand All @@ -147,8 +177,8 @@ class EqualOptions {
ARROW_EXPORT bool ArrayEquals(const Array& left, const Array& right,
const EqualOptions& = EqualOptions::Defaults());

/// Returns true if the arrays are approximately equal. For non-floating point
/// types, this is equivalent to ArrayEquals(left, right)
/// Returns true if the arrays are approximately equal according to the absolute tolerance
/// method. For non-floating point types, this is equivalent to ArrayEquals(left, right)
///
/// Note that arrow::ArrayStatistics is not included in the comparison.
ARROW_EXPORT bool ArrayApproxEquals(const Array& left, const Array& right,
Expand All @@ -163,6 +193,7 @@ ARROW_EXPORT bool ArrayRangeEquals(const Array& left, const Array& right,
const EqualOptions& = EqualOptions::Defaults());

/// Returns true if indicated equal-length segment of arrays are approximately equal
/// according to the absolute tolerance method.
///
/// Note that arrow::ArrayStatistics is not included in the comparison.
ARROW_EXPORT bool ArrayRangeApproxEquals(const Array& left, const Array& right,
Expand Down Expand Up @@ -202,7 +233,8 @@ ARROW_EXPORT bool ArrayStatisticsEquals(
ARROW_EXPORT bool ScalarEquals(const Scalar& left, const Scalar& right,
const EqualOptions& options = EqualOptions::Defaults());

/// Returns true if scalars are approximately equal
/// Returns true if the scalars are approximately equal according to the absolute
/// tolerance method.
/// \param[in] left a Scalar
/// \param[in] right a Scalar
/// \param[in] options comparison options
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ arrow_util_srcs = [
'util/time.cc',
'util/tracing.cc',
'util/trie.cc',
'util/ulp_distance.cc',
'util/union_util.cc',
'util/unreachable.cc',
'util/uri.cc',
Expand Down
138 changes: 138 additions & 0 deletions cpp/src/arrow/scalar_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,11 @@ class TestRealScalar : public ::testing::Test {
ASSERT_FALSE(scalar_nan_->Equals(*scalar_val_, options));
ASSERT_TRUE(scalar_nan_->Equals(*scalar_nan_, options));
ASSERT_TRUE(scalar_nan_->Equals(*scalar_other_nan_, options));

options = options.nans_equal(false).use_ulp_distance(true);
ASSERT_FALSE(scalar_nan_->Equals(*scalar_val_, options));
ASSERT_FALSE(scalar_nan_->Equals(*scalar_nan_, options));
ASSERT_FALSE(scalar_nan_->Equals(*scalar_other_nan_, options));
}

void TestSignedZeroEquals() {
Expand All @@ -365,6 +370,11 @@ class TestRealScalar : public ::testing::Test {
ASSERT_FALSE(scalar_zero_->Equals(*scalar_val_, options));
ASSERT_TRUE(scalar_zero_->Equals(*scalar_other_zero_, options));
ASSERT_FALSE(scalar_zero_->Equals(*scalar_neg_zero_, options));

options = options.signed_zeros_equal(false).use_ulp_distance(true);
ASSERT_FALSE(scalar_zero_->Equals(*scalar_val_, options));
ASSERT_TRUE(scalar_zero_->Equals(*scalar_other_zero_, options));
ASSERT_FALSE(scalar_zero_->Equals(*scalar_neg_zero_, options));
}

void TestApproxEquals() {
Expand Down Expand Up @@ -562,6 +572,134 @@ TYPED_TEST(TestRealScalar, ListViewOf) { this->TestListViewOf(); }

TYPED_TEST(TestRealScalar, LargeListViewOf) { this->TestLargeListViewOf(); }

namespace {
template <typename ScalarType, typename CType>
std::shared_ptr<ScalarType> CreateScalar(CType value) {
return std::make_shared<ScalarType>(value);
}

template <typename CType>
bool IsScalarEqual(CType left, CType right, const EqualOptions& options) {
std::shared_ptr<Scalar> scalar_left =
CreateScalar<typename CTypeTraits<CType>::ScalarType>(left);
std::shared_ptr<Scalar> scalar_right =
CreateScalar<typename CTypeTraits<CType>::ScalarType>(right);
return scalar_left->Equals(*scalar_right, options);
}

template <typename CType>
void AssertScalarEquals(CType left, CType right, const EqualOptions& options) {
ASSERT_TRUE(IsScalarEqual(left, right, options));
}

template <typename CType>
void AssertScalarNotEquals(CType left, CType right, const EqualOptions& options) {
ASSERT_FALSE(IsScalarEqual(left, right, options));
}

} // namespace

TEST(TestRealScalarUlpDistance, Double) {
auto options = EqualOptions::Defaults().use_ulp_distance(true);

// Check for different value
AssertScalarEquals(1.0, 1.0000000000000002, options.ulp_distance(1));
AssertScalarEquals(1.0, 1.0000000000000007, options.ulp_distance(3));
AssertScalarNotEquals(1.0, 1.0000000000000002, options.ulp_distance(0));
AssertScalarNotEquals(1.0, 1.0000000000000007, options.ulp_distance(2));
AssertScalarNotEquals(1.0, 1.0000000000000007, options.ulp_distance(1));
AssertScalarEquals(123.4567, 123.45670000000015, options.ulp_distance(11));
AssertScalarNotEquals(123.4567, 123.45670000000015,
options.use_ulp_distance(false).ulp_distance(11));
AssertScalarNotEquals(123.4567, 123.45670000000015, options.ulp_distance(10));

// Left and right have a different exponent but are still very close
AssertScalarEquals(1.0, 0.9999999999999999, options.ulp_distance(1));
AssertScalarEquals(1.0, 0.9999999999999988, options.ulp_distance(11));
AssertScalarNotEquals(1.0, 0.9999999999999988, options.ulp_distance(10));
AssertScalarEquals(1.0000000000000002, 0.9999999999999999, options.ulp_distance(2));
AssertScalarNotEquals(1.0000000000000002, 0.9999999999999999, options.ulp_distance(1));
AssertScalarEquals(0.9999999999999988, 1.0000000000000007, options.ulp_distance(14));
AssertScalarNotEquals(0.9999999999999988, 1.0000000000000007,
options.ulp_distance(14).use_ulp_distance(false));
AssertScalarNotEquals(0.9999999999999988, 1.0000000000000007, options.ulp_distance(13));

// Check for infinity
double max = std::numeric_limits<double>::max();
double positive_infinity = std::numeric_limits<double>::infinity();
double negative_infinity = -1 * std::numeric_limits<double>::infinity();
AssertScalarNotEquals(max, positive_infinity, options.ulp_distance(0));
AssertScalarEquals(max, positive_infinity, options.ulp_distance(1));
AssertScalarNotEquals(max, positive_infinity,
options.use_ulp_distance(false).ulp_distance(1));
AssertScalarNotEquals(positive_infinity, negative_infinity, options);
}

TEST(TestRealScalarUlpDistance, Float) {
auto options = EqualOptions::Defaults().use_ulp_distance(true);

// Check for different value
AssertScalarEquals(1.0f, 1.0000001f, options.ulp_distance(1));
AssertScalarEquals(1.0f, 1.0000013f, options.ulp_distance(11));
AssertScalarNotEquals(1.0f, 1.0000001f, options.ulp_distance(0));
AssertScalarNotEquals(1.0f, 1.0000013f, options.ulp_distance(10));
AssertScalarEquals(123.456f, 123.456085f, options.ulp_distance(11));
AssertScalarNotEquals(123.456f, 123.456085f, options.ulp_distance(10));

// Left and right have a different exponent but are still very close
AssertScalarEquals(1.0f, 0.99999994f, options.ulp_distance(1));
AssertScalarEquals(1.0f, 0.99999934f, options.ulp_distance(11));
AssertScalarNotEquals(1.0f, 0.99999934f, options.ulp_distance(10));
AssertScalarEquals(1.0000001f, 0.99999994f, options.ulp_distance(2));
AssertScalarNotEquals(1.0000001f, 0.99999994f, options.ulp_distance(1));
AssertScalarEquals(1.0000013f, 0.99999934f, options.ulp_distance(22));
AssertScalarNotEquals(1.0000013f, 0.99999934f, options.ulp_distance(21));

// Check for infinity
float max = std::numeric_limits<float>::max();
float positive_infinity = std::numeric_limits<float>::infinity();
float negative_infinity = -1 * std::numeric_limits<float>::infinity();
AssertScalarNotEquals(max, positive_infinity, options.ulp_distance(0));
AssertScalarEquals(max, positive_infinity, options.ulp_distance(1));
AssertScalarNotEquals(max, positive_infinity,
options.use_ulp_distance(false).ulp_distance(1));
AssertScalarNotEquals(positive_infinity, negative_infinity, options);
}

TEST(TestRealScalarUlpDistance, HalfFloat) {
auto options = EqualOptions::Defaults().use_ulp_distance(true);

// Check for different value
AssertScalarEquals(Float16(1.0f), Float16(1.00097656f), options.ulp_distance(1));
AssertScalarEquals(Float16(1.0f), Float16(1.01074219f), options.ulp_distance(11));
AssertScalarNotEquals(Float16(1.0f), Float16(1.00097656f), options.ulp_distance(0));
AssertScalarNotEquals(Float16(1.0f), Float16(1.01074219f), options.ulp_distance(10));
AssertScalarNotEquals(Float16(123.456f), Float16(124.143501f),
options.ulp_distance(10));

// Left and right have a different exponent but are still very close
AssertScalarEquals(Float16(1.0f), Float16(0.999511719f), options.ulp_distance(1));
AssertScalarEquals(Float16(1.0f), Float16(0.994628906f), options.ulp_distance(11));
AssertScalarNotEquals(Float16(1.0f), Float16(0.994628906f), options.ulp_distance(10));
AssertScalarEquals(Float16(1.00097656), Float16(0.999511719f), options.ulp_distance(2));
AssertScalarNotEquals(Float16(1.00097656), Float16(0.999511719f),
options.ulp_distance(1));
AssertScalarEquals(Float16(1.01074219f), Float16(0.994628906f),
options.ulp_distance(22));
AssertScalarNotEquals(Float16(1.01074219f), Float16(0.994628906f),
options.ulp_distance(21));

// Check for infinity
Float16 max = std::numeric_limits<Float16>::max();
Float16 positive_infinity = std::numeric_limits<Float16>::infinity();
Float16 negative_infinity = -std::numeric_limits<Float16>::infinity();
AssertScalarNotEquals(max, positive_infinity, options.ulp_distance(0));
AssertScalarEquals(max, positive_infinity, options.ulp_distance(1));
AssertScalarNotEquals(max, positive_infinity,
options.use_ulp_distance(false).ulp_distance(1));
AssertScalarNotEquals(positive_infinity, negative_infinity, options);
}

template <typename T>
class TestDecimalScalar : public ::testing::Test {
public:
Expand Down
73 changes: 10 additions & 63 deletions cpp/src/arrow/testing/math.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,64 +25,13 @@
#include <gtest/gtest.h>

#include "arrow/util/float16.h"
#include "arrow/util/logging_internal.h"
#include "arrow/util/ubsan.h"
#include "arrow/util/ulp_distance.h"

namespace arrow {
namespace {

template <typename Float>
struct FloatToUInt;

template <>
struct FloatToUInt<double> {
using Type = uint64_t;
};

template <>
struct FloatToUInt<float> {
using Type = uint32_t;
};

template <>
struct FloatToUInt<util::Float16> {
using Type = uint16_t;
};

template <typename Float>
struct UlpDistanceUtil {
public:
using UIntType = typename FloatToUInt<Float>::Type;
static constexpr UIntType kNumberOfBits = sizeof(Float) * 8;
static constexpr UIntType kSignMask = static_cast<UIntType>(1) << (kNumberOfBits - 1);

// This implementation is inspired by:
// https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
static UIntType UlpDistance(Float left, Float right) {
auto unsigned_left = util::SafeCopy<UIntType>(left);
auto unsigned_right = util::SafeCopy<UIntType>(right);
auto biased_left = ConvertSignAndMagnitudeToBiased(unsigned_left);
auto biased_right = ConvertSignAndMagnitudeToBiased(unsigned_right);
if (biased_left > biased_right) {
std::swap(biased_left, biased_right);
}
return biased_right - biased_left;
}

private:
// Source reference (GoogleTest):
// https://github.com/google/googletest/blob/1b96fa13f549387b7549cc89e1a785cf143a1a50/googletest/include/gtest/internal/gtest-internal.h#L345-L368
static UIntType ConvertSignAndMagnitudeToBiased(UIntType value) {
if (value & kSignMask) {
return ~value + 1;
} else {
return value | kSignMask;
}
}
};

template <typename Float>
bool WithinUlpGeneric(Float left, Float right, int n_ulps) {
bool WithinUlpGeneric(Float left, Float right, uint16_t n_ulps) {
if constexpr (std::is_same_v<Float, util::Float16>) {
if (left.is_nan() || right.is_nan()) {
return left.is_nan() == right.is_nan();
Expand All @@ -102,41 +51,39 @@ bool WithinUlpGeneric(Float left, Float right, int n_ulps) {
return left == right;
}

DCHECK_GE(n_ulps, 1);
return UlpDistanceUtil<Float>::UlpDistance(left, right) <=
static_cast<uint64_t>(n_ulps);
return UlpDistance(left, right, n_ulps);
}

template <typename Float>
void AssertWithinUlpGeneric(Float left, Float right, int n_ulps) {
void AssertWithinUlpGeneric(Float left, Float right, uint16_t n_ulps) {
if (!WithinUlpGeneric(left, right, n_ulps)) {
FAIL() << left << " and " << right << " are not within " << n_ulps << " ulps";
}
}

} // namespace

bool WithinUlp(util::Float16 left, util::Float16 right, int n_ulps) {
bool WithinUlp(util::Float16 left, util::Float16 right, uint16_t n_ulps) {
return WithinUlpGeneric(left, right, n_ulps);
}

bool WithinUlp(float left, float right, int n_ulps) {
bool WithinUlp(float left, float right, uint16_t n_ulps) {
return WithinUlpGeneric(left, right, n_ulps);
}

bool WithinUlp(double left, double right, int n_ulps) {
bool WithinUlp(double left, double right, uint16_t n_ulps) {
return WithinUlpGeneric(left, right, n_ulps);
}

void AssertWithinUlp(util::Float16 left, util::Float16 right, int n_ulps) {
void AssertWithinUlp(util::Float16 left, util::Float16 right, uint16_t n_ulps) {
AssertWithinUlpGeneric(left, right, n_ulps);
}

void AssertWithinUlp(float left, float right, int n_ulps) {
void AssertWithinUlp(float left, float right, uint16_t n_ulps) {
AssertWithinUlpGeneric(left, right, n_ulps);
}

void AssertWithinUlp(double left, double right, int n_ulps) {
void AssertWithinUlp(double left, double right, uint16_t n_ulps) {
AssertWithinUlpGeneric(left, right, n_ulps);
}

Expand Down
Loading
Loading