From 1b3df323ee978c2447bbcdbd4d27ca274d826da3 Mon Sep 17 00:00:00 2001 From: Laramie Leavitt Date: Sat, 21 Feb 2026 14:26:13 -0800 Subject: [PATCH] Update FuzzingBitGen to take two streams, an control stream and a data stream. The control stream indicates whether the distribution functions will return boundary values (min, max, mean) or a value derived from the data stream. The data stream provides the actual byte data for generating random values. The basic flow of each variate generation is: 1. Read a byte from the control stream (in fuzzing_bit_gen). 2. Depending on the byte, return a min/max/mean/variate, etc. The actual c++ distribution functions are now called in most cases, so that outputs are more aligned with actual distribution behavior. The old approach was in many ways similar in that it would pull a byte from the head of the stream to determine whether to return a min or a max, and then it would attempt to pull variate data from the tail of the stream, and neither was reused. Adds s test to demonstrate that std::shuffle() is properly manipulated by the fuzzing framework. Also add additional tests to FuzzingBitGen for the distribution functions. NOTE: This will change the variates generated by FuzzingBitGen from prior versions. PiperOrigin-RevId: 873431571 --- domain_tests/bitgen_ref_domain_test.cc | 37 +- e2e_tests/functional_test.cc | 7 + e2e_tests/testdata/BUILD | 2 + .../fuzz_tests_for_microbenchmarking.cc | 17 +- fuzztest/BUILD | 12 + fuzztest/fuzzing_bit_gen.cc | 126 +++++- fuzztest/fuzzing_bit_gen.h | 95 ++-- fuzztest/fuzzing_bit_gen_test.cc | 110 +++++ fuzztest/internal/BUILD | 13 +- fuzztest/internal/domains/arbitrary_impl.h | 25 +- fuzztest/internal/domains/bit_gen_ref.h | 152 +++++-- fuzztest/internal/domains/container_of_impl.h | 2 +- fuzztest/internal/register_fuzzing_mocks.cc | 412 ++++++++++-------- fuzztest/internal/register_fuzzing_mocks.h | 13 +- .../internal/register_fuzzing_mocks_test.cc | 173 ++++++++ 15 files changed, 870 insertions(+), 326 deletions(-) create mode 100644 fuzztest/fuzzing_bit_gen_test.cc create mode 100644 fuzztest/internal/register_fuzzing_mocks_test.cc diff --git a/domain_tests/bitgen_ref_domain_test.cc b/domain_tests/bitgen_ref_domain_test.cc index 869bc0f8b..5c03b38e0 100644 --- a/domain_tests/bitgen_ref_domain_test.cc +++ b/domain_tests/bitgen_ref_domain_test.cc @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/random/bit_gen_ref.h" #include "absl/random/random.h" @@ -23,38 +25,39 @@ namespace fuzztest { namespace { -TEST(BitGenRefDomainTest, DistinctVariatesGeneratedByCallOperator) { +TEST(BitGenRefDomainTest, DefaultInitializationIsRepeatable) { absl::BitGen bitgen_for_seeding; Domain domain = Arbitrary(); Value v0(domain, bitgen_for_seeding); - Value v1(domain, bitgen_for_seeding); - // Discard the first value, which may be from the data stream. - // If the implementation of BitGenRefDomain changes this may break. - v0.user_value(); - v1.user_value(); - - std::vector a, b; + std::vector variates; for (int i = 0; i < 10; ++i) { - a.push_back(v0.user_value()); - b.push_back(v1.user_value()); + variates.push_back(v0.user_value()); } - EXPECT_NE(a, b); + // The default domain does not require either data or control streams. + // So the initial sequences may be the same, but we generally don't expect + // them all to be the same. + EXPECT_THAT(variates, testing::Not(testing::Each(testing::Eq(variates[0])))); } -TEST(BitGenRefDomainTest, AbseilUniformReturnsLowerBoundWhenExhausted) { +TEST(BitGenRefDomainTest, AbseilUniformIsFunctionalWhenExhausted) { absl::BitGen bitgen_for_seeding; Domain domain = Arbitrary(); Value v0(domain, bitgen_for_seeding); - // Discard the first value, which may be from the data stream. - // If the implementation of BitGenRefDomain changes this may break. - v0.user_value(); + // When the same domain is used to generate multiple values, the generated + // data sequence should be identical. + std::vector values; + for (int i = 0; i < 20; ++i) { + values.push_back(absl::Uniform(v0.user_value, 0, 100)); + } - for (int i = 0; i < 10; ++i) { - EXPECT_EQ(absl::Uniform(v0.user_value, 0, 100), 0); + // Verify repeatability + Value v1(v0, domain); + for (int i = 0; i < 20; ++i) { + EXPECT_EQ(absl::Uniform(v1.user_value, 0, 100), values[i]); } } diff --git a/e2e_tests/functional_test.cc b/e2e_tests/functional_test.cc index bd54ea750..99497b892 100644 --- a/e2e_tests/functional_test.cc +++ b/e2e_tests/functional_test.cc @@ -1707,6 +1707,13 @@ TEST_P(FuzzingModeCrashFindingTest, BitGenRefTestFindsAbortInFuzzingMode) { ExpectTargetAbort(status, std_err); } +TEST_P(FuzzingModeCrashFindingTest, + BitGenRefShuffleTestFindsAbortInFuzzingMode) { + auto [status, std_out, std_err] = Run("MySuite.BitGenRefShuffle"); + EXPECT_THAT_LOG(std_err, HasSubstr("argument 0: absl::BitGenRefShuffle{}")); + ExpectTargetAbort(status, std_err); +} + TEST_P(FuzzingModeCrashFindingTest, FixedSizeVectorValueTestFindsAbortInFuzzingMode) { auto [status, std_out, std_err] = Run("MySuite.FixedSizeVectorValue"); diff --git a/e2e_tests/testdata/BUILD b/e2e_tests/testdata/BUILD index 8ee022ae0..6307e6ebe 100644 --- a/e2e_tests/testdata/BUILD +++ b/e2e_tests/testdata/BUILD @@ -38,6 +38,7 @@ cc_binary( "@abseil-cpp//absl/strings:str_format", "@abseil-cpp//absl/strings:string_view", "@com_google_fuzztest//fuzztest", + "@com_google_fuzztest//fuzztest:fuzzing_bit_gen", "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", "@com_google_fuzztest//fuzztest/internal:test_protobuf_cc_proto", ], @@ -95,6 +96,7 @@ cc_binary( "@com_google_fuzztest//common:logging", "@com_google_fuzztest//fuzztest", "@com_google_fuzztest//fuzztest:flatbuffers", + "@com_google_fuzztest//fuzztest:fuzzing_bit_gen", "@com_google_fuzztest//fuzztest:fuzztest_gtest_main", "@com_google_fuzztest//fuzztest:googletest_fixture_adapter", "@com_google_fuzztest//fuzztest/internal:test_flatbuffers_cc_fbs", diff --git a/e2e_tests/testdata/fuzz_tests_for_microbenchmarking.cc b/e2e_tests/testdata/fuzz_tests_for_microbenchmarking.cc index f1c7d636a..dddfea3a7 100644 --- a/e2e_tests/testdata/fuzz_tests_for_microbenchmarking.cc +++ b/e2e_tests/testdata/fuzz_tests_for_microbenchmarking.cc @@ -24,6 +24,7 @@ // i.e., to check that the fuzzer behaves as expected and outputs the expected // results. E.g., the fuzzer finds the abort() or bug. +#include #include #include #include @@ -33,6 +34,7 @@ #include #include #include +#include #include #include @@ -240,15 +242,26 @@ FUZZ_TEST(MySuite, FixedSizeVectorValue) .WithDomains(fuzztest::VectorOf(fuzztest::Arbitrary()).WithSize(4)); __attribute__((optnone)) void BitGenRef(absl::BitGenRef bitgen) { + // This uses FuzzingBitGen's mocking support for absl::Uniform(). if (absl::Uniform(bitgen, 0, 256) == 'F' && absl::Uniform(bitgen, 0, 256) == 'U' && - absl::Uniform(bitgen, 0, 256) == 'Z' && - absl::Uniform(bitgen, 0, 256) == 'Z') { + absl::Uniform(bitgen, 32, 128) == 'Z' && + absl::Uniform(bitgen, 32, 128) == 'Z') { std::abort(); // Bug! } } FUZZ_TEST(MySuite, BitGenRef); +__attribute__((optnone)) void BitGenRefShuffle(absl::BitGenRef bitgen) { + // This uses FuzzingBitGen's operator(). + std::vector v = {4, 1, 3, 2, 5}; + std::shuffle(v.begin(), v.end(), bitgen); + if (std::is_sorted(v.begin(), v.end())) { + std::abort(); // Bug! + } +} +FUZZ_TEST(MySuite, BitGenRefShuffle); + __attribute__((optnone)) void WithDomainClass(uint8_t a, double d) { // This will only crash with a=10, to make it easier to check the results. // d can have any value. diff --git a/fuzztest/BUILD b/fuzztest/BUILD index c3bd2261f..ea5f21c75 100644 --- a/fuzztest/BUILD +++ b/fuzztest/BUILD @@ -218,11 +218,23 @@ cc_library( ], ) +cc_test( + name = "fuzzing_bit_gen_test", + srcs = ["fuzzing_bit_gen_test.cc"], + deps = [ + ":fuzzing_bit_gen", + "@abseil-cpp//absl/random", + "@abseil-cpp//absl/random:bit_gen_ref", + "@googletest//:gtest_main", + ], +) + cc_library( name = "fuzzing_bit_gen", srcs = ["fuzzing_bit_gen.cc"], hdrs = ["fuzzing_bit_gen.h"], deps = [ + "@abseil-cpp//absl/base:core_headers", "@abseil-cpp//absl/base:fast_type_id", "@abseil-cpp//absl/base:no_destructor", "@abseil-cpp//absl/container:flat_hash_map", diff --git a/fuzztest/fuzzing_bit_gen.cc b/fuzztest/fuzzing_bit_gen.cc index 689a8f7cd..3c370c8b3 100644 --- a/fuzztest/fuzzing_bit_gen.cc +++ b/fuzztest/fuzzing_bit_gen.cc @@ -18,42 +18,111 @@ #include #include #include +#include +#include #include "absl/base/fast_type_id.h" #include "absl/base/no_destructor.h" #include "absl/container/flat_hash_map.h" +#include "absl/numeric/bits.h" +#include "absl/numeric/int128.h" #include "absl/types/span.h" #include "./fuzztest/internal/register_fuzzing_mocks.h" namespace fuzztest { +namespace { -FuzzingBitGen::FuzzingBitGen(absl::Span data_stream) - : data_stream_(data_stream) { - // Seed the internal URBG with the first 8 bytes of the data stream. - uint64_t stream_seed = 0x6C7FD535EDC7A62D; - if (!data_stream_.empty()) { - size_t num_bytes = std::min(sizeof(stream_seed), data_stream_.size()); - std::memcpy(&stream_seed, data_stream_.data(), num_bytes); - data_stream_.remove_prefix(num_bytes); - } - seed(stream_seed); +// Minimal implementation of a PCG64 engine equivalent to xsl_rr_128_64. +inline constexpr absl::uint128 multiplier() { + return absl::MakeUint128(0x2360ed051fc65da4, 0x4385df649fccf645); +} +inline constexpr absl::uint128 increment() { + return absl::MakeUint128(0x5851f42d4c957f2d, 0x14057b7ef767814f); +} +inline absl::uint128 lcg(absl::uint128 s) { + return s * multiplier() + increment(); +} +inline uint64_t mix(absl::uint128 state) { + uint64_t h = absl::Uint128High64(state); + uint64_t rotate = h >> 58u; + uint64_t s = absl::Uint128Low64(state) ^ h; + return absl::rotr(s, rotate); +} + +enum class Instruction : uint8_t { + kDataStreamVariate = 0, + kLCGVariate = 1, + kMin = 2, + kMax = 3, + kMean = 4, + kAlternateVariate = 5, +}; + +Instruction byte_to_instruction(uint8_t byte) { + return static_cast(byte % 6); +} + +} // namespace + +FuzzingBitGen::FuzzingBitGen(absl::Span data_stream, + absl::Span control_stream, + uint64_t seed_value) + : control_stream_(control_stream), data_stream_(data_stream) { + seed(seed_value); } -FuzzingBitGen::result_type FuzzingBitGen::operator()() { - // The non-mockable calls will consume the next 8 bytes from the data - // stream until it is exhausted, then they will return a value from the - // internal URBG. - if (!data_stream_.empty()) { - result_type x = 0; - size_t num_bytes = std::min(sizeof(x), data_stream_.size()); - std::memcpy(&x, data_stream_.data(), num_bytes); - data_stream_.remove_prefix(num_bytes); - return x; +void FuzzingBitGen::DataStreamFn(bool use_lcg, void* result, + size_t result_size) { + if (!use_lcg && !data_stream_.empty()) { + // Consume up to result_size bytes from the data stream. + size_t n = + result_size < data_stream_.size() ? result_size : data_stream_.size(); + memcpy(result, data_stream_.data(), n); + data_stream_.remove_prefix(n); + return; } - // Fallback to the internal URBG. + // The stream is expired. Generate up to 16 bytes from the LCG. state_ = lcg(state_); - return mix(state_); + uint64_t x = mix(state_); + memcpy(result, &x, result_size > sizeof(x) ? sizeof(x) : result_size); + if (result_size > sizeof(x)) { + state_ = lcg(state_); + uint64_t x = mix(state_); + memcpy(static_cast(result) + sizeof(x), &x, + result_size - sizeof(x) > sizeof(x) ? sizeof(x) + : result_size - sizeof(x)); + } +} + +uint64_t FuzzingBitGen::operator()() { + // Use the control stream to determine the return value. + if (c_ >= control_stream_.size()) { + c_ = 0; + } + Instruction instruction = + control_stream_.empty() + ? (data_stream_.empty() ? Instruction::kLCGVariate + : Instruction::kDataStreamVariate) + : byte_to_instruction(control_stream_[c_++]); + switch (instruction) { + case Instruction::kMin: + return 0; // min + case Instruction::kMax: + return (std::numeric_limits::max)(); // max + case Instruction::kMean: + return (std::numeric_limits::max)() / 2; // mean + default: + break; + } + uint64_t x = 0; + DataStreamFn(instruction == Instruction::kLCGVariate, &x, sizeof(x)); + return x; +} + +void FuzzingBitGen::seed(result_type seed_value) { + absl::uint128 tmp = seed_value; + state_ = lcg(tmp + increment()); } bool FuzzingBitGen::InvokeMock(absl::FastTypeIdType key_id, void* args_tuple, @@ -73,7 +142,18 @@ bool FuzzingBitGen::InvokeMock(absl::FastTypeIdType key_id, void* args_tuple, if (it == fuzzing_map->end()) { return false; } - it->second(data_stream_, args_tuple, result); + + if (c_ >= control_stream_.size()) { + c_ = 0; + } + uint8_t control_byte = control_stream_.empty() ? 0 : control_stream_[c_++]; + const bool use_lcg = + byte_to_instruction(control_byte) == Instruction::kLCGVariate; + auto data_stream_fn = [this, use_lcg](void* result, size_t n) { + this->DataStreamFn(use_lcg, result, n); + }; + + it->second(data_stream_fn, control_byte, args_tuple, result); return true; } diff --git a/fuzztest/fuzzing_bit_gen.h b/fuzztest/fuzzing_bit_gen.h index f613892e7..95d1f6b0a 100644 --- a/fuzztest/fuzzing_bit_gen.h +++ b/fuzztest/fuzzing_bit_gen.h @@ -15,11 +15,13 @@ #ifndef FUZZTEST_FUZZTEST_FUZZING_BIT_GEN_H_ #define FUZZTEST_FUZZTEST_FUZZING_BIT_GEN_H_ +#include #include +#include #include +#include "absl/base/attributes.h" #include "absl/base/fast_type_id.h" -#include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "absl/random/bit_gen_ref.h" #include "absl/types/span.h" @@ -27,39 +29,48 @@ namespace fuzztest { /// FuzzingBitGen is a BitGen instance which uses the Abseil mock mechanisms -/// to return distribution specific variates based on the fuzz data stream. +/// to return distribution-specific variates based on the underlying control +/// and data streams. The specific sequence generated by a FuzzingBitGen may +/// vary due to the underlying code paths and whether implementation details +/// change, such as adding support for new distributions, etc. /// -/// It is perhaps useful to think of the data stream as a sequence of structured -/// variates with semantic meaning, rather than just values. Recombinations of, -/// and modifications to, the sequence are useful in exploring the behavior of -/// the code under test in ways where a mere random-number generator sequence -/// would not, as changing the seed mutates the entire sequence. +/// The `control stream` is a sequence of control bytes which modifies the +/// behavior of the mocked distribution functions, such as returning min, +/// max, mean, or alternate values. When the control stream is exhausted, it is +/// reused (wrapped around). /// -/// NOTE: The first 8 bytes of the fuzzed data stream may be used to seed an -/// internal pnrg which is used to generate random variates for calls which -/// are not captured through mockable Abseil random distribution methods -/// (for example, calls to std::shuffle(...)). Otherwise the data stream is -/// treated as a stream where the next value in the sequence maps to the output -/// of the next distribution method. Note that the specific sequence generated -/// by a FuzzingBitGen may vary due to the underlying code paths and whether -/// implementation details change, such as adding support for new distributions, -/// etc. +/// The `data_stream` is used to provide the random values for the mocked +/// distribution functions, as well as an internal URBG used for non-mocked +/// functions, such as std::shuffle(...). When the data stream is exhausted, +/// it is *not* reused (wrapped around), instead the internal URBG is used as +/// the source of random variates. /// -/// When the data stream is exhausted, absl::MockingBitGen mockable calls will -/// continue to return an arbitrary legal value, typically the minimum or mean -/// value of the distribution. +/// The `seed_value` is used to seed the internal URBG. +/// +/// Recombinations of, and modifications to, the sequence are useful in +/// exploring the behavior of the code under test in ways where a mere +/// random-number generator sequence would not, as changing the seed mutates the +/// entire sequence. +/// +/// NOTE: It is possible to construct a FuzzingBitGen instance which fails +/// into an infinite loop when using a distribution which relies on rejection +/// sampling, or when user code does not make progress on certain outputs. There +/// is no built-in protection against this. /// /// This type is thread-compatible, but not thread-safe. class FuzzingBitGen { public: - // Create a FuzzingBitGen from an unowned fuzzed `data` source, which must - // outlive the FuzzingBitGen instance. - // - // The first 8 bytes of the data stream are used to seed an internal URBG used - // for calls which are not mockable. - explicit FuzzingBitGen(absl::Span data_stream); - - // Disallow copy, assign, and move. + // Create a FuzzingBitGen from an unowned fuzzed `data_stream` source, an + // optional `control_stream` source, and an optional `seed_value`. + // Both streams must outlive the FuzzingBitGen. + explicit FuzzingBitGen( + absl::Span data_stream ABSL_ATTRIBUTE_LIFETIME_BOUND, + absl::Span control_stream ABSL_ATTRIBUTE_LIFETIME_BOUND, + uint64_t seed_value = 0); + explicit FuzzingBitGen( + absl::Span data_stream ABSL_ATTRIBUTE_LIFETIME_BOUND) + : FuzzingBitGen(data_stream, {}) {} + FuzzingBitGen(const FuzzingBitGen&) = delete; FuzzingBitGen& operator=(const FuzzingBitGen&) = delete; FuzzingBitGen(FuzzingBitGen&&) = default; @@ -75,38 +86,24 @@ class FuzzingBitGen { return (std::numeric_limits::max)(); } - void seed(result_type seed_value = 0) { - absl::uint128 tmp = seed_value; - state_ = lcg(tmp + increment()); - } + void seed(result_type seed_value = 0); result_type operator()(); private: - // Minimal implementation of a PCG64 engine equivalent to xsl_rr_128_64. - static inline constexpr absl::uint128 multiplier() { - return absl::MakeUint128(0x2360ed051fc65da4, 0x4385df649fccf645); - } - static inline constexpr absl::uint128 increment() { - return absl::MakeUint128(0x5851f42d4c957f2d, 0x14057b7ef767814f); - } - inline absl::uint128 lcg(absl::uint128 s) { - return s * multiplier() + increment(); - } - inline result_type mix(absl::uint128 state) { - uint64_t h = absl::Uint128High64(state); - uint64_t rotate = h >> 58u; - uint64_t s = absl::Uint128Low64(state) ^ h; - return absl::rotr(s, rotate); - } + // Consumes up to x bytes from the data stream, or if there are no remaining + // bytes, returns up to 8 bytes from the internal LCG PRNG. + void DataStreamFn(bool use_lcg, void* result, size_t result_size); // InvokeMock meets the requirements of absl::BitGenRef::InvokeMock. // This method detects whether the key has been registered as supported, // and, if so, returns a value derived from `data_stream_`. bool InvokeMock(absl::FastTypeIdType key_id, void* args_tuple, void* result); - absl::Span data_stream_; // Mock data stream. - absl::uint128 state_ = 0; // Internal URBG state. + absl::uint128 state_ = 0; // Internal URBG state. + absl::Span control_stream_; + size_t c_ = 0; // Offset into the control stream. + absl::Span data_stream_; template friend struct ::absl::random_internal::DistributionCaller; // for InvokeMock diff --git a/fuzztest/fuzzing_bit_gen_test.cc b/fuzztest/fuzzing_bit_gen_test.cc new file mode 100644 index 000000000..e4c2c261c --- /dev/null +++ b/fuzztest/fuzzing_bit_gen_test.cc @@ -0,0 +1,110 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./fuzztest/fuzzing_bit_gen.h" + +#include +#include + +#include "gtest/gtest.h" +#include "absl/random/bit_gen_ref.h" +#include "absl/random/random.h" + +namespace fuzztest { +namespace { +const uint8_t kDataStream[18] = { + 1, 2, 3, 4, 5, 6, 7, 8, // + 9, 10, // +}; +const uint64_t kSeedValue = 0x0807060504030201; + +TEST(FuzzingBitGenTest, OperatorReturnsBytesFromStream) { + const uint8_t kControlStream[1] = {0}; + // {0} -> uses data stream, reads up to 8 bytes. + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + EXPECT_EQ(bitgen(), 0x0807060504030201); + EXPECT_EQ(bitgen(), 0x0A09); + EXPECT_NE(bitgen(), 0x0807060504030201); // Data stream is exhausted. +} + +TEST(FuzzingBitGenTest, OperatorUsesPcgForEmptyStreams) { + FuzzingBitGen bitgen({}, {}, kSeedValue); + uint64_t v1 = bitgen(); + EXPECT_NE(v1, 0); + uint64_t v2 = bitgen(); + EXPECT_NE(v2, 0); + + FuzzingBitGen bitgen2({}, {}, kSeedValue); + EXPECT_EQ(bitgen2(), v1); + EXPECT_EQ(bitgen2(), v2); +} + +TEST(FuzzingBitGenTest, OperatorUsesPcgForEmptyStreamsUnseeded) { + FuzzingBitGen bitgen({}); + uint64_t v1 = bitgen(); + EXPECT_NE(v1, 0); + uint64_t v2 = bitgen(); + EXPECT_NE(v2, 0); + + FuzzingBitGen bitgen2({}); + EXPECT_EQ(bitgen2(), v1); + EXPECT_EQ(bitgen2(), v2); +} + +TEST(FuzzingBitGenTest, OperatorUsesInstructionStream) { + const uint8_t kControlStream[6] = { + 0, // data stream variate + 1, // lcg variate + 2, // min + 3, // max + 4, // mean + 5, // alternate variate + }; + FuzzingBitGen bitgen(kDataStream, kControlStream); + EXPECT_EQ(bitgen(), 0x0807060504030201); + EXPECT_NE(bitgen(), 0); + EXPECT_EQ(bitgen(), 0); // min + EXPECT_EQ(bitgen(), std::numeric_limits::max()); // max + EXPECT_EQ(bitgen(), std::numeric_limits::max() / 2); // mean +} + +TEST(FuzzingBitGenTest, ControlStreamWrapsAround) { + const uint8_t kControlStream[3] = {2, 0, 2}; + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + EXPECT_EQ(bitgen(), 0); // min + EXPECT_EQ(bitgen(), 0x0807060504030201); // data + EXPECT_EQ(bitgen(), 0); // min + EXPECT_EQ(bitgen(), 0); // min + EXPECT_EQ(bitgen(), 0x0a09); // data, exhausted + EXPECT_EQ(bitgen(), 0); // min +} + +TEST(FuzzingBitGenTest, MockingIsRepeatable) { + FuzzingBitGen bg1(kDataStream, {}, kSeedValue); + absl::BitGenRef ref1(bg1); + int v_a = absl::Uniform(ref1, 0, 100); + int v_b = absl::Uniform(ref1, 0, 100); + int v_c = absl::Uniform(ref1, 0, 100); + uint64_t v_d = ref1(); + + FuzzingBitGen bg2(kDataStream, {}, kSeedValue); + absl::BitGenRef ref2(bg2); + EXPECT_EQ(absl::Uniform(ref2, 0, 100), v_a); + EXPECT_EQ(absl::Uniform(ref2, 0, 100), v_b); + EXPECT_EQ(absl::Uniform(ref2, 0, 100), v_c); + EXPECT_EQ(ref2(), v_d); +} + +} // namespace +} // namespace fuzztest diff --git a/fuzztest/internal/BUILD b/fuzztest/internal/BUILD index 0ea46b829..c895bac51 100644 --- a/fuzztest/internal/BUILD +++ b/fuzztest/internal/BUILD @@ -314,8 +314,17 @@ cc_library( "@abseil-cpp//absl/base:fast_type_id", "@abseil-cpp//absl/functional:function_ref", "@abseil-cpp//absl/random:distributions", - "@abseil-cpp//absl/types:span", - "@com_google_fuzztest//common:logging", + ], +) + +cc_test( + name = "register_fuzzing_mocks_test", + srcs = ["register_fuzzing_mocks_test.cc"], + deps = [ + "@abseil-cpp//absl/random", + "@abseil-cpp//absl/random:bit_gen_ref", + "@com_google_fuzztest//fuzztest:fuzzing_bit_gen", + "@googletest//:gtest_main", ], ) diff --git a/fuzztest/internal/domains/arbitrary_impl.h b/fuzztest/internal/domains/arbitrary_impl.h index f6b170258..44cc84d60 100644 --- a/fuzztest/internal/domains/arbitrary_impl.h +++ b/fuzztest/internal/domains/arbitrary_impl.h @@ -449,8 +449,8 @@ AggregateOfImpl...> // improve if possible. template ()> decltype(DetectAggregateOfImpl2( - BindAggregate(std::declval(), std::integral_constant{}))) -DetectAggregateOfImpl(); + BindAggregate(std::declval(), + std::integral_constant{}))) DetectAggregateOfImpl(); template class ArbitraryImpl< @@ -584,13 +584,24 @@ class ArbitraryImpl // Arbitrary for absl::BitGenRef. template <> class ArbitraryImpl - : public BitGenRefDomain, - ArbitraryImpl>> { - using InnerContainer = + : public BitGenRefDomain< + /*DataSequence=*/ + SequenceContainerOfImpl, ArbitraryImpl>, + /*ControlSequence=*/ + SequenceContainerOfImpl, ElementOfImpl>, + /*SeedValue=*/ArbitraryImpl> { + using DataSequence = SequenceContainerOfImpl, ArbitraryImpl>; - + using ControlSequence = + SequenceContainerOfImpl, ElementOfImpl>; + // The control stream determines the behavior of the FuzzingBitGen; + // See the implementation for valid value, which may change. public: - ArbitraryImpl() : BitGenRefDomain(InnerContainer{}.WithMinSize(8)) {} + ArbitraryImpl() + : BitGenRefDomain( + DataSequence{}, + ControlSequence{ElementOfImpl({0, 1, 2, 3, 4, 5})}, + ArbitraryImpl()) {} }; } // namespace fuzztest::internal diff --git a/fuzztest/internal/domains/bit_gen_ref.h b/fuzztest/internal/domains/bit_gen_ref.h index 824d35e88..96007e351 100644 --- a/fuzztest/internal/domains/bit_gen_ref.h +++ b/fuzztest/internal/domains/bit_gen_ref.h @@ -15,6 +15,7 @@ #ifndef FUZZTEST_FUZZTEST_INTERNAL_DOMAINS_BIT_GEN_REF_H_ #define FUZZTEST_FUZZTEST_INTERNAL_DOMAINS_BIT_GEN_REF_H_ +#include #include #include #include @@ -39,35 +40,50 @@ namespace fuzztest::internal { // destroyed when CleanupBitGen is called. class BitGenCorpusValue { public: - using InitializerData = std::vector; using URBG = FuzzingBitGen; - explicit BitGenCorpusValue(InitializerData data) - : initializer_data_(std::move(data)) {} + explicit BitGenCorpusValue(std::vector data, + std::vector control, uint64_t seed) + : data_(std::move(data)), control_(std::move(control)), seed_(seed) {} ~BitGenCorpusValue() { CleanupBitGen(); } // Copy and move do not initialize the internal URBG instance. BitGenCorpusValue(const BitGenCorpusValue& o) - : initializer_data_(o.initializer_data_), bitgen_(std::nullopt) {} + : data_(o.data_), + control_(o.control_), + seed_(o.seed_), + bitgen_(std::nullopt) {} BitGenCorpusValue& operator=(const BitGenCorpusValue& o) { // The internal URBG should be unused. FUZZTEST_CHECK(!bitgen_.has_value()); - initializer_data_ = o.initializer_data_; + data_ = o.data_; + control_ = o.control_; + seed_ = o.seed_; return *this; } BitGenCorpusValue(BitGenCorpusValue&& o) - : initializer_data_(std::move(o.initializer_data_)), + : data_(std::move(o.data_)), + control_(std::move(o.control_)), + seed_(std::move(o.seed_)), bitgen_(std::nullopt) {} BitGenCorpusValue& operator=(BitGenCorpusValue&& o) { // The internal URBG should be unused. FUZZTEST_CHECK(!o.bitgen_.has_value()); FUZZTEST_CHECK(!bitgen_.has_value()); - initializer_data_ = std::move(o.initializer_data_); + data_ = std::move(o.data_); + control_ = std::move(o.control_); + seed_ = o.seed_; return *this; } - InitializerData& initializer_data() { return initializer_data_; } - const InitializerData& initializer_data() const { return initializer_data_; } + std::vector& data() { return data_; } + const std::vector& data() const { return data_; } + + std::vector& control() { return control_; } + const std::vector& control() const { return control_; } + + uint64_t& seed() { return seed_; } + const uint64_t& seed() const { return seed_; } // Cleanup the internal URBG instance. void CleanupBitGen() { bitgen_.reset(); } @@ -77,16 +93,16 @@ class BitGenCorpusValue { // NOTE: The returned reference is valid until the next call to CleanupBitGen. URBG& GetBitGen() const { if (!bitgen_.has_value()) { - bitgen_.emplace(initializer_data_); + bitgen_.emplace(data_, control_, seed_); } return *bitgen_; } private: - // Underlying fuzzed data stream; the input to the URBG constructor. - // When using util_random::FuzzingBitGen, this is a vector of uint8_t which - // defines the sequence of random variates. - std::vector initializer_data_; + // Inputs to the FuzzingBitGen constructor which must outlive it. + std::vector data_; // fuzztest generated data stream. + std::vector control_; // fuzztest generated control stream. + uint64_t seed_; // fuzztest generated seed. mutable std::optional bitgen_; }; @@ -99,21 +115,25 @@ class BitGenCorpusValue { // // The domain accepts an input "data stream" corpus which is used to initialize // a FuzzingBitGen instance. This internal FuzzingBitGen instance is bound to an -// absl::BitGenRef when GetValue is called. +// absl::BitGenRef when GetValue is called. The control stream is reused +// (wrapped around) when exhausted. The data stream falls back to an LCG PRNG +// when exhausted. // // BitGenRefDomain does not support seeded domains. // BitGenRefDomain does not support GetRandomValue. -template +template class BitGenRefDomain - : public domain_implementor::DomainBase, - /*value_type=*/absl::BitGenRef, - /*corpus_type=*/BitGenCorpusValue> { + : public domain_implementor::DomainBase< + BitGenRefDomain, + /*value_type=*/absl::BitGenRef, BitGenCorpusValue> { public: using typename BitGenRefDomain::DomainBase::corpus_type; using typename BitGenRefDomain::DomainBase::value_type; - explicit BitGenRefDomain(const Inner& inner) : inner_(inner) {} - explicit BitGenRefDomain(Inner&& inner) : inner_(std::move(inner)) {} + explicit BitGenRefDomain(const DataSequence& data, + const ControlSequence& control, + const SeedValue& seed_value) + : data_(data), control_(control), seed_value_(seed_value) {} BitGenRefDomain(const BitGenRefDomain&) = default; BitGenRefDomain(BitGenRefDomain&&) = default; @@ -121,17 +141,35 @@ class BitGenRefDomain BitGenRefDomain& operator=(BitGenRefDomain&&) = default; corpus_type Init(absl::BitGenRef prng) { - return corpus_type{inner_.Init(prng)}; + auto data = data_.Init(prng); + auto control = control_.Init(prng); + auto seed_value = seed_value_.Init(prng); + return corpus_type{data_.GetValue(data), control_.GetValue(control), + seed_value_.GetValue(seed_value)}; } void Mutate(corpus_type& corpus_value, absl::BitGenRef prng, const domain_implementor::MutationMetadata& metadata, bool only_shrink) { corpus_value.CleanupBitGen(); - inner_.Mutate(corpus_value.initializer_data(), prng, metadata, only_shrink); + auto data_corpus = data_.FromValue(corpus_value.data()); + if (data_corpus.has_value()) { + data_.Mutate(*data_corpus, prng, metadata, only_shrink); + corpus_value.data() = data_.GetValue(*data_corpus); + } + auto control_corpus = control_.FromValue(corpus_value.control()); + if (control_corpus.has_value()) { + control_.Mutate(*control_corpus, prng, metadata, only_shrink); + corpus_value.control() = control_.GetValue(*control_corpus); + } + auto seed_corpus = seed_value_.FromValue(corpus_value.seed()); + if (seed_corpus.has_value()) { + seed_value_.Mutate(*seed_corpus, prng, metadata, only_shrink); + corpus_value.seed() = seed_value_.GetValue(*seed_corpus); + } } - absl::BitGenRef GetValue(const corpus_type& corpus_value) const { - return absl::BitGenRef(corpus_value.GetBitGen()); + value_type GetValue(const corpus_type& corpus_value) const { + return corpus_value.GetBitGen(); } value_type GetRandomValue(absl::BitGenRef prng) { @@ -143,24 +181,70 @@ class BitGenRefDomain // No conversion from absl::BitGenRef back to corpus. return std::nullopt; } + absl::Status ValidateCorpusValue(const corpus_type& corpus_value) const { - return inner_.ValidateCorpusValue(corpus_value.initializer_data()); + absl::Status status; + auto data_corpus = data_.FromValue(corpus_value.data()); + if (!data_corpus.has_value()) { + return absl::InvalidArgumentError("Invalid data stream"); + } + auto control_corpus = control_.FromValue(corpus_value.control()); + if (!control_corpus.has_value()) { + return absl::InvalidArgumentError("Invalid control stream"); + } + auto seed_corpus = seed_value_.FromValue(corpus_value.seed()); + if (!seed_corpus.has_value()) { + return absl::InvalidArgumentError("Invalid seed value"); + } + status.Update(data_.ValidateCorpusValue(*data_corpus)); + status.Update(control_.ValidateCorpusValue(*control_corpus)); + status.Update(seed_value_.ValidateCorpusValue(*seed_corpus)); + return status; } + void UpdateMemoryDictionary( const corpus_type& corpus_value, domain_implementor::ConstCmpTablesPtr cmp_tables) { - return inner_.UpdateMemoryDictionary(corpus_value.initializer_data(), - cmp_tables); + auto data_corpus = data_.FromValue(corpus_value.data()); + auto control_corpus = control_.FromValue(corpus_value.control()); + auto seed_corpus = seed_value_.FromValue(corpus_value.seed()); + assert(data_corpus.has_value()); + assert(control_corpus.has_value()); + assert(seed_corpus.has_value()); + data_.UpdateMemoryDictionary(*data_corpus, cmp_tables); + control_.UpdateMemoryDictionary(*control_corpus, cmp_tables); + seed_value_.UpdateMemoryDictionary(*seed_corpus, cmp_tables); } + std::optional ParseCorpus(const internal::IRObject& obj) const { - auto x = inner_.ParseCorpus(obj); - if (x.has_value()) { - return corpus_type(*std::move(x)); + auto container = obj.Subs(); + if (container && container->size() == 3) { + auto x = data_.ParseCorpus((*container)[0]); + auto y = control_.ParseCorpus((*container)[1]); + auto z = seed_value_.ParseCorpus((*container)[2]); + if (x.has_value() && y.has_value() && z.has_value()) { + return corpus_type(data_.GetValue(*x), control_.GetValue(*y), + seed_value_.GetValue(*z)); + } } return std::nullopt; } + internal::IRObject SerializeCorpus(const corpus_type& corpus_value) const { - return inner_.SerializeCorpus(corpus_value.initializer_data()); + auto data_corpus = data_.FromValue(corpus_value.data()); + auto control_corpus = control_.FromValue(corpus_value.control()); + auto seed_corpus = seed_value_.FromValue(corpus_value.seed()); + assert(data_corpus.has_value()); + assert(control_corpus.has_value()); + assert(seed_corpus.has_value()); + + internal::IRObject obj; + auto& v = obj.MutableSubs(); + v.reserve(3); + v.emplace_back(data_.SerializeCorpus(*data_corpus)); + v.emplace_back(control_.SerializeCorpus(*control_corpus)); + v.emplace_back(seed_value_.SerializeCorpus(*seed_corpus)); + return obj; } auto GetPrinter() const { return Printer{}; } @@ -174,7 +258,9 @@ class BitGenRefDomain } }; - Inner inner_; + DataSequence data_; + ControlSequence control_; + SeedValue seed_value_; }; } // namespace fuzztest::internal diff --git a/fuzztest/internal/domains/container_of_impl.h b/fuzztest/internal/domains/container_of_impl.h index 0bb9d7993..7a8cd4568 100644 --- a/fuzztest/internal/domains/container_of_impl.h +++ b/fuzztest/internal/domains/container_of_impl.h @@ -99,7 +99,7 @@ class ContainerOfImplBase is_vector_or_string; // If `!container_has_memory_dict`, dict_type is a bool and dict - // is not used. This conditional_t may be neccessary because some + // is not used. This conditional_t may be necessary because some // value_type may not have copy constructors(for example, proto). // Making it a safe type(bool) to not break some targets. using dict_type = std::conditional_t -#include #include -#include #include -#include #include #include #include @@ -34,148 +30,80 @@ #include "absl/random/log_uniform_int_distribution.h" #include "absl/random/poisson_distribution.h" #include "absl/random/zipf_distribution.h" -#include "absl/types/span.h" -#include "./common/logging.h" namespace fuzztest::internal { namespace { -// Reference type to consume bytes from a data stream; these are used by -// the fuzzing bitgen distribution implementations. -struct DataStreamConsumer { - // This is a reference to the fuzzing data stream since the mutations - // (src.remove_prefix(...), etc.) are applied to the source stream. - absl::Span& src; - - // Consumes up to num_bytes from the head of the data stream. - size_t ConsumeHead(void* destination, size_t num_bytes) { - num_bytes = std::min(num_bytes, src.size()); - std::memcpy(destination, src.data(), num_bytes); - src.remove_prefix(num_bytes); - return num_bytes; - } +enum class Instruction : uint8_t { + kDataStreamVariate = 0, + kLCGVariate = 1, + kMin = 2, + kMax = 3, + kMean = 4, + kAlternateVariate = 5, +}; + +class ImplURBG { + public: + DataStreamFn data_stream_fn_; + uint8_t control_byte_; - // Consumes up to num_bytes from the tail of the data stream. - size_t ConsumeTail(void* destination, size_t num_bytes) { - num_bytes = std::min(num_bytes, src.size()); - std::memcpy(destination, src.data() + src.size() - num_bytes, num_bytes); - src.remove_suffix(num_bytes); - return num_bytes; + Instruction instruction() { + return static_cast(control_byte_ % 6); } - // Consumes a T from the head of the data stream. template - T ConsumeHead() { - std::conditional_t, uint8_t, T> x{}; - ConsumeHead(&x, sizeof(x)); - if constexpr (std::is_same_v) { - return static_cast(x & 1); - } else { - return x; - } + T get_int_value() { + T x = 0; + data_stream_fn_(&x, sizeof(x)); + return x; } - // Consumes a T from the tail of the data stream. template - T ConsumeTail() { - std::conditional_t, uint8_t, T> x{}; - ConsumeTail(&x, sizeof(x)); - if constexpr (std::is_same_v) { - return static_cast(x & 1); + T get_int_value_in_range(uint64_t range) { + // Consume fewer bytes of the data_stream when dealing with a power + // of 2 range. + if (range == 0) { + return 0; + } + uint64_t x = 0; + if (range <= (std::numeric_limits::max)()) { + x = get_int_value(); + } else if (range <= (std::numeric_limits::max)()) { + x = get_int_value(); + } else if (range <= (std::numeric_limits::max)()) { + x = get_int_value(); } else { - return x; + x = get_int_value(); + } + if ((range & (range + 1)) == 0) { + return static_cast(x & range); // power of 2 range + } else { + return static_cast(x % (range + 1)); } } - // Returns a real value in the range [0.0, 1.0]. - template - T ConsumeProbability() { - static_assert(std::is_floating_point_v && sizeof(T) <= sizeof(uint64_t), - "A floating point type is required."); - using IntegralType = - typename std::conditional_t<(sizeof(T) <= sizeof(uint32_t)), uint32_t, - uint64_t>; - auto int_value = ConsumeTail(); - return static_cast(int_value) / - static_cast(std::numeric_limits::max()); - } - - // Returns a value in the closed-closed range [min, max]. - template - T ConsumeValueInRange(T min, T max) { - FUZZTEST_CHECK_LE(min, max); - - if (min == max) return min; - - // Return the min or max value more frequently. - uint8_t byte = ConsumeHead(); - if (byte == 0) { - return min; - } else if (byte == 1) { - return max; - } - byte >>= 1; + // URBG interface. + using result_type = uint64_t; - return ConsumeValueInRangeImpl(min, max, byte); + static constexpr result_type(min)() { + return (std::numeric_limits::min)(); } - - private: - // Returns a real value in the range [min, max] - template - std::enable_if_t, T> // - ConsumeValueInRangeImpl(T min, T max, uint8_t byte) { - static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported float type."); - // Returns a floating point value in the given range by consuming bytes - // from the input data. If there's no input data left, returns |min|. Note - // that |min| must be less than or equal to |max|. - T range = .0; - T result = min; - constexpr T zero(.0); - if (max > zero && min < zero && max > min + std::numeric_limits::max()) { - // The diff |max - min| would overflow the given floating point type. - // Use the half of the diff as the range and consume a bool to decide - // whether the result is in the first of the second part of the diff. - range = (max / 2.0) - (min / 2.0); - if (byte & 1) { - result += range; - } - } else { - range = max - min; - } - return result + range * ConsumeProbability(); + static constexpr result_type(max)() { + return (std::numeric_limits::max)(); } - // Returns an integral value in the range [min, max] - template - std::enable_if_t, T> // - ConsumeValueInRangeImpl(T min, T max, uint8_t) { - static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type."); - - // Use the biggest type possible to hold the range and the result. - uint64_t range = static_cast(max) - static_cast(min); - uint64_t result = 0; - size_t offset = 0; - while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 && - !src.empty()) { - uint8_t byte = src.back(); - src.remove_suffix(1); - result = (result << CHAR_BIT) | byte; - offset += CHAR_BIT; - } - - // Avoid division by 0, in case |range + 1| results in overflow. - if (range != std::numeric_limits::max()) { - result = result % (range + 1); - } + void reset() {} - return static_cast(static_cast(min) + result); - } + uint64_t operator()() { return get_int_value(); } }; // ----------------------------------------------------------------------------- // Bernoulli -struct ImplBernoulli : public DataStreamConsumer { +struct ImplBernoulli { + ImplURBG urbg; + using DistrT = absl::bernoulli_distribution; using ArgTupleT = std::tuple; using ResultT = bool; @@ -183,150 +111,260 @@ struct ImplBernoulli : public DataStreamConsumer { ResultT operator()(double p) { // Just generate a boolean; mostly ignoring p. // The 0/1 cases are special cased to avoid returning false on constants. - if (p == 0.0) { + if (p <= 0.0) { return false; - } else if (p == 1.0) { + } else if (p >= 1.0) { return true; - } else { - return ConsumeHead(); } + switch (urbg.instruction()) { + case Instruction::kMin: + return false; + case Instruction::kMax: + return true; + case Instruction::kMean: + return p >= 0.5; + default: + break; + } + return urbg.get_int_value() & 1; } }; // Beta template -struct ImplBeta : public DataStreamConsumer { +struct ImplBeta { + ImplURBG urbg; + using DistrT = absl::beta_distribution; using ArgTupleT = std::tuple; using ResultT = RealType; ResultT operator()(RealType a, RealType b) { - if (!src.empty()) { - auto x = ConsumeTail(); - if (std::isfinite(x)) { - return x; - } + switch (urbg.instruction()) { + case Instruction::kMin: + return 0.0; + case Instruction::kMax: + return 1.0; + case Instruction::kMean: + return a / (a + b); // mean + default: + break; } - return a / (a + b); // mean + return DistrT(a, b)(urbg); } }; // Exponential template -struct ImplExponential : public DataStreamConsumer { +struct ImplExponential { + ImplURBG urbg; + using DistrT = absl::exponential_distribution; using ArgTupleT = std::tuple; using ResultT = RealType; ResultT operator()(RealType lambda) { - if (!src.empty()) { - auto x = ConsumeTail(); - if (std::isfinite(x)) { - return x; - } + switch (urbg.instruction()) { + case Instruction::kMin: + return 0; + case Instruction::kMean: + return RealType{1} / lambda; // mean + case Instruction::kMax: + return (std::numeric_limits::max)(); + case Instruction::kAlternateVariate: + return absl::uniform_real_distribution( + 0, (std::numeric_limits::max)())(urbg); + default: + break; } - return RealType{1} / lambda; // mean + return DistrT(lambda)(urbg); } }; // Gaussian template -struct ImplGaussian : public DataStreamConsumer { +struct ImplGaussian { + ImplURBG urbg; + using DistrT = absl::gaussian_distribution; using ArgTupleT = std::tuple; using ResultT = RealType; ResultT operator()(RealType mean, RealType sigma) { - if (src.empty()) return mean; const auto ten_sigma = sigma * 10; - RealType min = mean - ten_sigma; - RealType max = mean + ten_sigma; - return ConsumeValueInRange(min, max); + switch (urbg.instruction()) { + // Technically the min/max are -inf/+inf. + case Instruction::kMin: + return -(std::numeric_limits::max)(); + case Instruction::kMax: + return (std::numeric_limits::max)(); + case Instruction::kMean: + return mean; + case Instruction::kAlternateVariate: + // this makes unlikely values much more likely. + return absl::uniform_real_distribution( + mean - ten_sigma, mean + ten_sigma)(urbg); + default: + break; + } + return DistrT(mean, sigma)(urbg); } }; // LogUniform template -struct ImplLogUniform : public DataStreamConsumer { +struct ImplLogUniform { + ImplURBG urbg; + using DistrT = absl::log_uniform_int_distribution; using ArgTupleT = std::tuple; using ResultT = IntType; - ResultT operator()(IntType a, IntType b, IntType) { - if (src.empty()) return a; - return ConsumeValueInRange(a, b); + ResultT operator()(IntType a, IntType b, IntType base) { + switch (urbg.instruction()) { + case Instruction::kMin: + return a; + case Instruction::kMax: + return b; + case Instruction::kMean: + if (a > 0 && b > a) { + double log_b_over_a = std::log(static_cast(b) / a); + return static_cast(static_cast(b - a) / + log_b_over_a); + } + break; + case Instruction::kAlternateVariate: + return urbg.get_int_value_in_range(b - a) + a; + default: + break; + } + return DistrT(a, b, base)(urbg); } }; // Poisson template -struct ImplPoisson : public DataStreamConsumer { +struct ImplPoisson { + ImplURBG urbg; + using DistrT = absl::poisson_distribution; using ArgTupleT = std::tuple; using ResultT = IntType; - ResultT operator()(double) { - if (src.empty()) return 0; - return ConsumeValueInRange(0, std::numeric_limits::max()); + ResultT operator()(double lambda) { + switch (urbg.instruction()) { + case Instruction::kMin: + return 0; + case Instruction::kMax: + return (std::numeric_limits::max)(); + case Instruction::kMean: + return static_cast(lambda); + case Instruction::kAlternateVariate: + return urbg.get_int_value_in_range( + (std::numeric_limits::max)()); + default: + break; + } + return DistrT(lambda)(urbg); } }; // Zipf template -struct ImplZipf : public DataStreamConsumer { +struct ImplZipf { + ImplURBG urbg; + using DistrT = absl::zipf_distribution; using ArgTupleT = std::tuple; using ResultT = IntType; - ResultT operator()(IntType a, double, double) { - if (src.empty()) return 0; - return ConsumeValueInRange(0, a); + ResultT operator()(IntType k, double q, double v) { + switch (urbg.instruction()) { + case Instruction::kMin: + return 0; + case Instruction::kMax: + return k; + case Instruction::kAlternateVariate: + return urbg.get_int_value_in_range(k); + default: + break; + } + return DistrT(k, q, v)(urbg); } }; // Uniform template -struct ImplUniform : public DataStreamConsumer { +struct ImplUniform { + ImplURBG urbg; using DistrT = absl::random_internal::UniformDistributionWrapper; using ResultT = R; ResultT operator()(absl::IntervalClosedClosedTag, R min, R max) { - if (src.empty()) return min; - return ConsumeValueInRange(min, max); + if constexpr (std::is_floating_point_v) { + return operator()(absl::IntervalClosedOpen, min, + std::nexttoward(max, (std::numeric_limits::max)())); + } + // Only int-typed calls should reach here. + if constexpr (std::is_integral_v) { + switch (urbg.instruction()) { + case Instruction::kMin: + return min; + case Instruction::kMax: + return max; + case Instruction::kMean: + return min + ((max - min) / 2); + default: + break; + } + if constexpr (sizeof(R) <= sizeof(uint8_t)) { + return min + urbg.get_int_value_in_range(static_cast(max) - + static_cast(min)); + } + // Fallback to absl::uniform_int_distribution. + return absl::uniform_int_distribution(min, max)(urbg); + } else { + return 0; + } } ResultT operator()(absl::IntervalClosedOpenTag, R min, R max) { - if (src.empty()) return min; + if constexpr (std::is_integral_v) { + return operator()(absl::IntervalClosedClosed, min, max - 1); + } + // Only real-typed calls should reach here. if constexpr (std::is_floating_point_v) { - max = std::nexttoward(max, min); - return ConsumeValueInRange(min, max); + switch (urbg.instruction()) { + case Instruction::kMin: + return min; + case Instruction::kMax: + return std::nexttoward(max, std::numeric_limits::min()); + case Instruction::kMean: + return min + ((max - min) / 2); + default: + break; + } + return absl::uniform_real_distribution(min, max)(urbg); } else { - max--; - return ConsumeValueInRange(min, max); + return 0; } } ResultT operator()(absl::IntervalOpenOpenTag, R min, R max) { - if (src.empty()) return min; if constexpr (std::is_floating_point_v) { - min = std::nexttoward(min, max); - max = std::nexttoward(max, min); - return ConsumeValueInRange(min, max); + return operator()(absl::IntervalClosedOpen, std::nexttoward(min, max), + max); } else { - min++; - max--; - return ConsumeValueInRange(min, max); + return operator()(absl::IntervalClosedOpen, min + 1, max); } } ResultT operator()(absl::IntervalOpenClosedTag, R min, R max) { - if (src.empty()) return min; if constexpr (std::is_floating_point_v) { - min = std::nexttoward(min, max); - return ConsumeValueInRange(min, max); + return operator()(absl::IntervalClosedClosed, std::nexttoward(min, max), + max); } else { - min++; - return ConsumeValueInRange(min, max); + return operator()(absl::IntervalClosedClosed, min + 1, max); } } @@ -336,23 +374,23 @@ struct ImplUniform : public DataStreamConsumer { ResultT operator()() { static_assert(std::is_unsigned_v); - if (src.empty()) return 0; - return ConsumeTail(); + return operator()(absl::IntervalClosedClosed, 0, + (std::numeric_limits::max)()); } }; // ----------------------------------------------------------------------------- -// InvokeFuzzFunction is a type-erased function pointer which is responsible for -// casting the args_tuple and result parameters to the correct types and then -// invoking the implementation functor. It is important that the ArgsTupleT and -// ResultT types match the types of the distribution and the implementation -// functions, so the HandleFuzzedFunction overloads are used to determine the -// correct types. +// InvokeFuzzFunction is a type-erased function pointer which is responsible +// for casting the args_tuple and result parameters to the correct types and +// then invoking the implementation functor. It is important that the +// ArgsTupleT and ResultT types match the types of the distribution and the +// implementation functions, so the HandleFuzzedFunction overloads are used to +// determine the correct types. template -void InvokeFuzzFunction(absl::Span& src, void* args_tuple, - void* result) { - FuzzFunctionT fn{src}; +void InvokeFuzzFunction(DataStreamFn data_stream_fn, uint8_t control_byte, + void* args_tuple, void* result) { + FuzzFunctionT fn{ImplURBG{data_stream_fn, control_byte}}; *static_cast(result) = absl::apply(fn, *static_cast(args_tuple)); } diff --git a/fuzztest/internal/register_fuzzing_mocks.h b/fuzztest/internal/register_fuzzing_mocks.h index 7726c2256..d02d50b51 100644 --- a/fuzztest/internal/register_fuzzing_mocks.h +++ b/fuzztest/internal/register_fuzzing_mocks.h @@ -15,18 +15,21 @@ #ifndef FUZZTEST_FUZZTEST_INTERNAL_REGISTER_FUZZING_MOCKS_H_ #define FUZZTEST_FUZZTEST_INTERNAL_REGISTER_FUZZING_MOCKS_H_ +#include #include #include "absl/base/fast_type_id.h" #include "absl/functional/function_ref.h" -#include "absl/types/span.h" namespace fuzztest::internal { -// TypeErasedFuzzFunctionT(datastream, args_tuple, result) is a type erased -// function pointer for use with absl::MockingBitGen and fuzztest mocking. -using TypeErasedFuzzFunctionT = void (*)(absl::Span&, void*, - void*); +// DataStreamFn copies up to n bytes from the data stream to the buffer pointer. +using DataStreamFn = absl::FunctionRef; + +// TypeErasedFuzzFunctionT(data_stream_fn, control_byte, args_tuple, result) +// is a type erased function pointer for use with absl::MockingBitGen and +// fuzztest mocking. +using TypeErasedFuzzFunctionT = void (*)(DataStreamFn, uint8_t, void*, void*); // Registers the fuzzing functions for Abseil distributions. void RegisterAbslRandomFuzzingMocks( diff --git a/fuzztest/internal/register_fuzzing_mocks_test.cc b/fuzztest/internal/register_fuzzing_mocks_test.cc new file mode 100644 index 000000000..5a33fbfc3 --- /dev/null +++ b/fuzztest/internal/register_fuzzing_mocks_test.cc @@ -0,0 +1,173 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "gtest/gtest.h" +#include "absl/random/bit_gen_ref.h" +#include "absl/random/random.h" +#include "./fuzztest/fuzzing_bit_gen.h" + +namespace fuzztest { +namespace { + +const uint8_t kControlStream[6] = { + 0, // data stream variate + 1, // lcg variate + 2, // min + 3, // max + 4, // mean + 5, // alternate variate +}; + +const uint8_t kDataStream[40] = { + 1, 2, 3, 4, 5, 6, 7, 8, // + 42, 42, 42, 42, 42, 42, 42, 42, // + 50, 60, 70, 80, 10, 20, 30, 40, // + 0x7f, 0x0f, 0x6f, 0x0f, 0x5f, 0x0f, +}; +const uint64_t kSeedValue = 0x0807060504030201; + +// Tests for the absl/random distribution functions which use the +// fuzztest::internal::RegisterAbslRandomFuzzingMocks() function. + +TEST(FuzzingBitGenTest, BernoulliDistributionUsesMock) { + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + absl::BitGenRef ref(bitgen); + + EXPECT_TRUE(absl::Bernoulli(ref, 0.5)); + EXPECT_TRUE(absl::Bernoulli(ref, 0.5)); // lcg + EXPECT_FALSE(absl::Bernoulli(ref, 0.5)); // min + EXPECT_TRUE(absl::Bernoulli(ref, 0.5)); // max + EXPECT_TRUE(absl::Bernoulli(ref, 0.6)); // mean +} + +TEST(FuzzingBitGenTest, BetaDistributionUsesMock) { + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + absl::BitGenRef ref(bitgen); + + EXPECT_DOUBLE_EQ(absl::Beta(ref, 2.0, 2.0), 0.081234075853663129); + EXPECT_DOUBLE_EQ(absl::Beta(ref, 2.0, 2.0), + 0.65593732986573283); // lcg + EXPECT_DOUBLE_EQ(absl::Beta(ref, 2.0, 2.0), 0.0); // min + EXPECT_DOUBLE_EQ(absl::Beta(ref, 2.0, 2.0), 1.0); // max + EXPECT_DOUBLE_EQ(absl::Beta(ref, 2.0, 2.0), 0.5); // mean +} + +TEST(FuzzingBitGenTest, ExponentialDistributionUsesMock) { + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + absl::BitGenRef ref(bitgen); + + EXPECT_DOUBLE_EQ(absl::Exponential(ref, 2.0), 0.015929665930210696); + EXPECT_DOUBLE_EQ(absl::Exponential(ref, 2.0), // lcg + 0.62503166008171429); + EXPECT_DOUBLE_EQ(absl::Exponential(ref, 2.0), 0.0); // min + EXPECT_DOUBLE_EQ(absl::Exponential(ref, 2.0), // max + std::numeric_limits::max()); + EXPECT_DOUBLE_EQ(absl::Exponential(ref, 2.0), 0.5); // mean + EXPECT_DOUBLE_EQ(absl::Exponential(ref, 2.0), // alt + 2.9609063397732257e+307); +} + +TEST(FuzzingBitGenTest, GaussianDistributionUsesMock) { + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + absl::BitGenRef ref(bitgen); + + EXPECT_DOUBLE_EQ(absl::Gaussian(ref, 10.0, 1.0), 10.215901634330736); + EXPECT_DOUBLE_EQ(absl::Gaussian(ref, 10.0, 1.0), + 9.3160235046777462); // lcg + EXPECT_DOUBLE_EQ(absl::Gaussian(ref, 10.0, 1.0), // min + -std::numeric_limits::max()); + EXPECT_DOUBLE_EQ(absl::Gaussian(ref, 10.0, 1.0), // max + std::numeric_limits::max()); + EXPECT_DOUBLE_EQ(absl::Gaussian(ref, 10.0, 1.0), 10.0); // mean + EXPECT_DOUBLE_EQ(absl::Gaussian(ref, 10.0, 1.0), // alt + 3.2941176470588234); +} + +TEST(FuzzingBitGenTest, LogUniformDistributionUsesMock) { + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + absl::BitGenRef ref(bitgen); + + EXPECT_EQ(absl::LogUniform(ref, 10, 1000), 10); + EXPECT_EQ(absl::LogUniform(ref, 10, 1000), 11); // lcg + EXPECT_EQ(absl::LogUniform(ref, 10, 1000), 10); // min + EXPECT_EQ(absl::LogUniform(ref, 10, 1000), 1000); // max + EXPECT_EQ(absl::LogUniform(ref, 10, 1000), 214); // mean (approx) + EXPECT_EQ(absl::LogUniform(ref, 10, 1000), 894); // alt +} + +TEST(FuzzingBitGenTest, PoissonDistributionUsesMock) { + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + absl::BitGenRef ref(bitgen); + + EXPECT_EQ(absl::Poisson(ref, 10.0), 2); + EXPECT_EQ(absl::Poisson(ref, 10.0), 9); // lcg + EXPECT_EQ(absl::Poisson(ref, 10.0), 0); // min + EXPECT_EQ(absl::Poisson(ref, 10.0), 2147483647); // max + EXPECT_EQ(absl::Poisson(ref, 10.0), 10); // mean + EXPECT_EQ(absl::Poisson(ref, 10.0), 0); // alt +} + +TEST(FuzzingBitGenTest, ZipfDistributionUsesMock) { + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + absl::BitGenRef ref(bitgen); + + EXPECT_EQ(absl::Zipf(ref, 100, 2.0, 1.0), 15); + EXPECT_EQ(absl::Zipf(ref, 100, 2.0, 1.0), 0); // lcg + EXPECT_EQ(absl::Zipf(ref, 100, 2.0, 1.0), 0); // min + EXPECT_EQ(absl::Zipf(ref, 100, 2.0, 1.0), 100); // max + EXPECT_NE(absl::Zipf(ref, 100, 2.0, 1.0), -1); // (unused) + EXPECT_EQ(absl::Zipf(ref, 100, 2.0, 1.0), 50); // alt +} + +TEST(FuzzingBitGenTest, UniformDistributionUInt) { + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + absl::BitGenRef ref(bitgen); + + EXPECT_EQ(absl::Uniform(ref), 0x0201); + EXPECT_EQ(absl::Uniform(ref), 34107); // lcg + EXPECT_EQ(absl::Uniform(ref), 0); // min + EXPECT_EQ(absl::Uniform(ref), + std::numeric_limits::max()); // max + EXPECT_EQ(absl::Uniform(ref), + std::numeric_limits::max() / 2); // mean +} + +TEST(FuzzingBitGenTest, UniformDistributionInt) { + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + absl::BitGenRef ref(bitgen); + + EXPECT_EQ(absl::Uniform(ref, 0, 100), 3); + EXPECT_EQ(absl::Uniform(ref, 0, 100), 71); // lcg + EXPECT_EQ(absl::Uniform(ref, 0, 100), 0); // min + EXPECT_EQ(absl::Uniform(ref, 0, 100), 99); // max + EXPECT_EQ(absl::Uniform(ref, 0, 100), 49); // mean +} + +TEST(FuzzingBitGenTest, UniformDistributionReal) { + FuzzingBitGen bitgen(kDataStream, kControlStream, kSeedValue); + absl::BitGenRef ref(bitgen); + + EXPECT_DOUBLE_EQ(absl::Uniform(ref, 0.0, 100.0), 3.1357170319108034); + EXPECT_DOUBLE_EQ(absl::Uniform(ref, 0.0, 100.0), // lcg + 71.351334409602003); + EXPECT_DOUBLE_EQ(absl::Uniform(ref, 0.0, 100.0), 0.0); // min + EXPECT_LT(absl::Uniform(ref, 0.0, 100.0), 100.0); // max + EXPECT_DOUBLE_EQ(absl::Uniform(ref, 0.0, 100.0), 50.0); // mean +} + +} // namespace +} // namespace fuzztest