Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 42 additions & 12 deletions rust/flatbuffers/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ use core::marker::PhantomData;
use core::ops::{Add, AddAssign, Deref, DerefMut, Index, IndexMut, Sub, SubAssign};
use core::ptr::write_bytes;

#[cfg(feature = "std")]
use std::collections::HashMap;

use crate::endian_scalar::emplace_scalar;
use crate::primitives::*;
use crate::push::{Push, PushAlignment};
Expand Down Expand Up @@ -139,6 +142,9 @@ pub struct FlatBufferBuilder<'fbb, A: Allocator = DefaultAllocator> {

min_align: usize,
force_defaults: bool,
#[cfg(feature = "std")]
strings_pool: HashMap<String, WIPOffset<&'fbb str>>,
#[cfg(not(feature = "std"))]
strings_pool: Vec<WIPOffset<&'fbb str>>,

_phantom: PhantomData<&'fbb ()>,
Expand Down Expand Up @@ -197,6 +203,9 @@ impl<'fbb, A: Allocator> FlatBufferBuilder<'fbb, A> {

min_align: 0,
force_defaults: false,
#[cfg(feature = "std")]
strings_pool: HashMap::new(),
#[cfg(not(feature = "std"))]
strings_pool: Vec::new(),

_phantom: PhantomData,
Expand Down Expand Up @@ -343,6 +352,31 @@ impl<'fbb, A: Allocator> FlatBufferBuilder<'fbb, A> {
WIPOffset::new(o.value())
}

/// Create a utf8 string, and de-duplicate if already created.
///
/// Uses a HashMap to track previously written strings, providing O(1)
/// amortized lookup and insertion.
#[cfg(feature = "std")]
#[inline]
pub fn create_shared_string<'a: 'b, 'b>(&'a mut self, s: &'b str) -> WIPOffset<&'fbb str> {
self.assert_not_nested(
"create_shared_string can not be called when a table or vector is under construction",
);

if let Some(&offset) = self.strings_pool.get(s) {
return offset;
}

let address = WIPOffset::new(self.create_byte_string(s.as_bytes()).value());
self.strings_pool.insert(s.to_owned(), address);
address
}

/// Create a utf8 string, and de-duplicate if already created.
///
/// Uses a sorted Vec with binary search to track previously written
/// strings when in `no_std` mode.
#[cfg(not(feature = "std"))]
#[inline]
pub fn create_shared_string<'a: 'b, 'b>(&'a mut self, s: &'b str) -> WIPOffset<&'fbb str> {
self.assert_not_nested(
Expand All @@ -355,19 +389,15 @@ impl<'fbb, A: Allocator> FlatBufferBuilder<'fbb, A> {

let found = self.strings_pool.binary_search_by(|offset| {
let ptr = offset.value() as usize;
// Gets The pointer to the size of the string
let str_memory = &buf[buf.len() - ptr..];
// Gets the size of the written string from buffer
let size =
u32::from_le_bytes([str_memory[0], str_memory[1], str_memory[2], str_memory[3]])
as usize;
// Size of the string size
let string_size: usize = 4;
// Fetches actual string bytes from index of string after string size
// to the size of string plus string size
let iter = str_memory[string_size..size + string_size].iter();
// Compares bytes of fetched string and current writable string
iter.cloned().cmp(s.bytes())
let size = u32::from_le_bytes([
str_memory[0],
str_memory[1],
str_memory[2],
str_memory[3],
]) as usize;
let stored = &str_memory[4..4 + size];
stored.cmp(s.as_bytes())
});

match found {
Expand Down
73 changes: 73 additions & 0 deletions tests/rust_usage_test/tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3224,4 +3224,77 @@ fn test_shared_strings() {
assert_eq!(string_vector.get(1), "foo");
}

#[test]
fn test_shared_strings_pool_deduplication() {
// Verifies that create_shared_string correctly deduplicates across many
// unique strings and that the resulting buffer contains valid data.
let mut builder = flatbuffers::FlatBufferBuilder::with_capacity(1024);

// Insert multiple unique strings and verify each gets a distinct offset.
let animals = ["cat", "dog", "bird", "fish", "snake"];
let offsets: Vec<_> = animals
.iter()
.map(|s| builder.create_shared_string(s))
.collect();
for i in 0..offsets.len() {
for j in (i + 1)..offsets.len() {
assert_ne!(
offsets[i].value(),
offsets[j].value(),
"unique strings '{}' and '{}' must have different offsets",
animals[i],
animals[j],
);
}
}

// Re-insert the same strings and verify they return the original offsets.
for (i, s) in animals.iter().enumerate() {
let offset = builder.create_shared_string(s);
assert_eq!(
offset.value(),
offsets[i].value(),
"duplicate string '{}' must return the same offset",
s,
);
}

// Verify that reset clears the pool: a previously shared string is no
// longer deduplicated against strings from before the reset.
builder.reset();
let a = builder.create_shared_string("cat");
let b = builder.create_shared_string("cat");
assert_eq!(a.value(), b.value(), "same string after reset must still deduplicate");

// Verify that shared strings produce a valid, readable buffer.
builder.reset();
let shared_name = builder.create_shared_string("goblin");
let shared_name_dup = builder.create_shared_string("goblin");
assert_eq!(shared_name.value(), shared_name_dup.value());

let enemy = my_game::example::Monster::create(
&mut builder,
&my_game::example::MonsterArgs {
name: Some(shared_name),
..Default::default()
},
);
let main_name = builder.create_shared_string("goblin");
assert_eq!(main_name.value(), shared_name.value());

let monster = my_game::example::Monster::create(
&mut builder,
&my_game::example::MonsterArgs {
name: Some(main_name),
enemy: Some(enemy),
..Default::default()
},
);
builder.finish(monster, None);

let m = my_game::example::root_as_monster(builder.finished_data()).unwrap();
assert_eq!(m.name(), "goblin");
assert_eq!(m.enemy().unwrap().name(), "goblin");
}

}
Loading