Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions tree/dataframe/src/RNTupleDS.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,20 @@ namespace ROOT::Internal::RDF {
/// TODO(jblomer): consider providing a general set of useful virtual fields as part of RNTuple.
class RRDFCardinalityField final : public ROOT::RFieldBase {
protected:
std::unique_ptr<ROOT::RFieldBase> CloneImpl(std::string_view /* newName */) const final
std::unique_ptr<ROOT::RFieldBase> CloneImpl(std::string_view newName) const final
{
return std::make_unique<RRDFCardinalityField>();
return std::make_unique<RRDFCardinalityField>(newName);
}
void ConstructValue(void *where) const final { *static_cast<std::size_t *>(where) = 0; }

// We construct these fields and know that they match the page source
void ReconcileOnDiskField(const RNTupleDescriptor &) final {}

public:
RRDFCardinalityField() : ROOT::RFieldBase("", "std::size_t", ROOT::ENTupleStructure::kPlain, false /* isSimple */) {}
RRDFCardinalityField(std::string_view name)
: ROOT::RFieldBase(name, "std::size_t", ROOT::ENTupleStructure::kPlain, false /* isSimple */)
{
}
RRDFCardinalityField(RRDFCardinalityField &&other) = default;
RRDFCardinalityField &operator=(RRDFCardinalityField &&other) = default;
~RRDFCardinalityField() override = default;
Expand Down Expand Up @@ -121,9 +124,9 @@ class RArraySizeField final : public ROOT::RFieldBase {
private:
std::size_t fArrayLength;

std::unique_ptr<ROOT::RFieldBase> CloneImpl(std::string_view) const final
std::unique_ptr<ROOT::RFieldBase> CloneImpl(std::string_view newName) const final
{
return std::make_unique<RArraySizeField>(fArrayLength);
return std::make_unique<RArraySizeField>(newName, fArrayLength);
}
void GenerateColumns() final { throw RException(R__FAIL("RArraySizeField fields must only be used for reading")); }
void GenerateColumns(const ROOT::RNTupleDescriptor &) final {}
Expand All @@ -140,8 +143,8 @@ class RArraySizeField final : public ROOT::RFieldBase {
void ReconcileOnDiskField(const RNTupleDescriptor &) final {}

public:
RArraySizeField(std::size_t arrayLength)
: ROOT::RFieldBase("", "std::size_t", ROOT::ENTupleStructure::kPlain, false /* isSimple */),
RArraySizeField(std::string_view name, std::size_t arrayLength)
: ROOT::RFieldBase(name, "std::size_t", ROOT::ENTupleStructure::kPlain, false /* isSimple */),
fArrayLength(arrayLength)
{
}
Expand Down Expand Up @@ -330,28 +333,32 @@ void ROOT::RDF::RNTupleDS::AddField(const ROOT::RNTupleDescriptor &desc, std::st
// Collections get the additional "number of" RDF column (e.g. "R_rdf_sizeof_tracks")
if (!fieldInfos.empty()) {
const auto &info = fieldInfos.back();
const std::string name = "R_rdf_sizeof_" + desc.GetFieldDescriptor(info.fFieldId).GetFieldName();
if (info.fNRepetitions > 0) {
cardinalityField = std::make_unique<ROOT::Internal::RDF::RArraySizeField>(info.fNRepetitions);
cardinalityField = std::make_unique<ROOT::Internal::RDF::RArraySizeField>(name, info.fNRepetitions);
} else {
cardinalityField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField>();
cardinalityField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField>(name);
}
cardinalityField->SetOnDiskId(info.fFieldId);
}

for (auto i = fieldInfos.rbegin(); i != fieldInfos.rend(); ++i) {
const auto &fieldInfo = *i;

const auto valueFieldName = valueField->GetFieldName();

if (fieldInfo.fNRepetitions > 0) {
// Fixed-size array, read it as ROOT::RVec in memory
valueField = std::make_unique<ROOT::RArrayAsRVecField>("", std::move(valueField), fieldInfo.fNRepetitions);
valueField =
std::make_unique<ROOT::RArrayAsRVecField>(valueFieldName, valueField->Clone("_0"), fieldInfo.fNRepetitions);
} else {
// Actual collection. A std::vector or ROOT::RVec gets added as a ROOT::RVec. All other collection types keep
// their original type.
if (convertToRVec) {
valueField = std::make_unique<ROOT::RRVecField>("", std::move(valueField));
valueField = std::make_unique<ROOT::RRVecField>(valueFieldName, valueField->Clone("_0"));
} else {
auto outerFieldType = desc.GetFieldDescriptor(fieldInfo.fFieldId).GetTypeName();
valueField = ROOT::RFieldBase::Create("", outerFieldType).Unwrap();
valueField = ROOT::RFieldBase::Create(valueFieldName, outerFieldType).Unwrap();
}
}

Expand All @@ -360,13 +367,14 @@ void ROOT::RDF::RNTupleDS::AddField(const ROOT::RNTupleDescriptor &desc, std::st
// Skip the inner-most collection level to construct the cardinality column
// It's taken care of by the `if (!fieldInfos.empty())` scope above
if (i != fieldInfos.rbegin()) {
const auto cardinalityFieldName = cardinalityField->GetFieldName();
if (fieldInfo.fNRepetitions > 0) {
// This collection level refers to a fixed-size array
cardinalityField =
std::make_unique<ROOT::RArrayAsRVecField>("", std::move(cardinalityField), fieldInfo.fNRepetitions);
cardinalityField = std::make_unique<ROOT::RArrayAsRVecField>(
cardinalityFieldName, cardinalityField->Clone("_0"), fieldInfo.fNRepetitions);
} else {
// This collection level refers to an RVec
cardinalityField = std::make_unique<ROOT::RRVecField>("", std::move(cardinalityField));
cardinalityField = std::make_unique<ROOT::RRVecField>(cardinalityFieldName, cardinalityField->Clone("_0"));
}

cardinalityField->SetOnDiskId(fieldInfo.fFieldId);
Expand Down
6 changes: 5 additions & 1 deletion tree/ntuple/inc/ROOT/RField.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,18 @@ class RFieldZero final : public RFieldBase {
/// This flag is reset on Clone().
bool fAllowFieldSubstitutions = false;

std::unordered_set<std::string> fSubFieldNames; ///< Efficient detection of duplicate field names

protected:
std::unique_ptr<RFieldBase> CloneImpl(std::string_view newName) const final;
void ConstructValue(void *) const final {}

public:
RFieldZero() : RFieldBase("", "", ROOT::ENTupleStructure::kRecord, false /* isSimple */) {}

using RFieldBase::Attach;
/// A public version of the Attach method that allows piece-wise construction of the zero field.
/// Will throw on duplicate subfield names.
void Attach(std::unique_ptr<RFieldBase> child);
Comment thread
hahnjo marked this conversation as resolved.
size_t GetValueSize() const final { return 0; }
size_t GetAlignment() const final { return 0; }

Expand Down
6 changes: 3 additions & 3 deletions tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,17 @@ protected:
/// that ensure that the resulting memory layout matches std::pair or std::tuple, resp.
RRecordField(std::string_view fieldName, std::string_view typeName);

void AttachItemFields(std::vector<std::unique_ptr<RFieldBase>> itemFields);
void AttachItemFields(std::vector<std::unique_ptr<RFieldBase>> itemFields, bool useNumberedFields);

template <std::size_t N>
void AttachItemFields(std::array<std::unique_ptr<RFieldBase>, N> itemFields)
void AttachItemFields(std::array<std::unique_ptr<RFieldBase>, N> itemFields, bool useNumberedFields)
{
fTraits |= kTraitTrivialType;
for (unsigned i = 0; i < N; ++i) {
fMaxAlignment = std::max(fMaxAlignment, itemFields[i]->GetAlignment());
fSize += GetItemPadding(fSize, itemFields[i]->GetAlignment()) + itemFields[i]->GetValueSize();
fTraits &= itemFields[i]->GetTraits();
Attach(std::move(itemFields[i]));
Attach(std::move(itemFields[i]), useNumberedFields ? ("_" + std::to_string(i)) : "");
}
// Trailing padding: although this is implementation-dependent, most add enough padding to comply with the
// requirements of the type with strictest alignment
Expand Down
5 changes: 3 additions & 2 deletions tree/ntuple/inc/ROOT/RFieldBase.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -501,8 +501,9 @@ protected:
// on the data that's written, e.g. for polymorphic types in the streamer field.
virtual ROOT::RExtraTypeInfoDescriptor GetExtraTypeInfo() const { return ROOT::RExtraTypeInfoDescriptor(); }

/// Add a new subfield to the list of nested fields
void Attach(std::unique_ptr<RFieldBase> child);
/// Add a new subfield to the list of nested fields. Throws an exception if childName is non-empty and the passed
/// field has a different name.
void Attach(std::unique_ptr<RFieldBase> child, std::string_view expectedChildName = "");

/// Called by ConnectPageSource() before connecting; derived classes may override this as appropriate, e.g.
/// for the application of I/O rules. In the process, the field at hand or its subfields may be marked as
Expand Down
35 changes: 26 additions & 9 deletions tree/ntuple/src/RField.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,22 @@ void ROOT::Internal::SetAllowFieldSubstitutions(RFieldZero &fieldZero, bool val)
fieldZero.fAllowFieldSubstitutions = val;
}

void ROOT::RFieldZero::Attach(std::unique_ptr<RFieldBase> child)
{
const std::string childName = child->GetFieldName();
Comment thread
hahnjo marked this conversation as resolved.
if (fSubFieldNames.count(childName) > 0)
throw RException(R__FAIL("duplicate field name: " + childName));
RFieldBase::Attach(std::move(child), "");
fSubFieldNames.insert(childName);
Comment thread
hahnjo marked this conversation as resolved.
}

std::unique_ptr<ROOT::RFieldBase> ROOT::RFieldZero::CloneImpl(std::string_view /*newName*/) const
{
auto result = std::make_unique<RFieldZero>();
for (auto &f : fSubfields)
for (auto &f : fSubfields) {
result->Attach(f->Clone(f->GetFieldName()));
result->fSubFieldNames.insert(f->GetFieldName());
}
return result;
}

Expand Down Expand Up @@ -565,14 +576,15 @@ ROOT::RRecordField::RRecordField(std::string_view fieldName, std::string_view ty
{
}

void ROOT::RRecordField::AttachItemFields(std::vector<std::unique_ptr<RFieldBase>> itemFields)
void ROOT::RRecordField::AttachItemFields(std::vector<std::unique_ptr<RFieldBase>> itemFields, bool useNumberedFields)
{
fTraits |= kTraitTrivialType;
for (auto &item : itemFields) {
fMaxAlignment = std::max(fMaxAlignment, item->GetAlignment());
fSize += GetItemPadding(fSize, item->GetAlignment()) + item->GetValueSize();
fTraits &= item->GetTraits();
Attach(std::move(item));
const auto N = itemFields.size();
for (std::size_t i = 0; i < N; ++i) {
fMaxAlignment = std::max(fMaxAlignment, itemFields[i]->GetAlignment());
fSize += GetItemPadding(fSize, itemFields[i]->GetAlignment()) + itemFields[i]->GetValueSize();
fTraits &= itemFields[i]->GetTraits();
Attach(std::move(itemFields[i]), useNumberedFields ? ("_" + std::to_string(i)) : "");
}
// Trailing padding: although this is implementation-dependent, most add enough padding to comply with the
// requirements of the type with strictest alignment
Expand Down Expand Up @@ -602,7 +614,12 @@ ROOT::RRecordField::RRecordField(std::string_view fieldName, std::vector<std::un
{
fTraits |= kTraitTrivialType;
fOffsets.reserve(itemFields.size());
std::unordered_set<std::string_view> fieldNames;
for (auto &item : itemFields) {
const auto &itemName = item->GetFieldName();
if (!fieldNames.insert(itemName).second) {
throw RException(R__FAIL("duplicate field name: " + itemName));
}
fSize += GetItemPadding(fSize, item->GetAlignment());
fOffsets.push_back(fSize);
fMaxAlignment = std::max(fMaxAlignment, item->GetAlignment());
Expand Down Expand Up @@ -845,7 +862,7 @@ ROOT::RNullableField::RNullableField(std::string_view fieldName, const std::stri
if (!itemField->GetTypeAlias().empty())
fTypeAlias = typePrefix + "<" + itemField->GetTypeAlias() + ">";

Attach(std::move(itemField));
Attach(std::move(itemField), "_0");
}

const ROOT::RFieldBase::RColumnRepresentations &ROOT::RNullableField::GetColumnRepresentations() const
Expand Down Expand Up @@ -1190,7 +1207,7 @@ ROOT::RAtomicField::RAtomicField(std::string_view fieldName, std::unique_ptr<RFi
if (!itemField->GetTypeAlias().empty())
fTypeAlias = "std::atomic<" + itemField->GetTypeAlias() + ">";

Attach(std::move(itemField));
Attach(std::move(itemField), "_0");
}

std::unique_ptr<ROOT::RFieldBase> ROOT::RAtomicField::CloneImpl(std::string_view newName) const
Expand Down
8 changes: 7 additions & 1 deletion tree/ntuple/src/RFieldBase.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -648,12 +648,18 @@ std::vector<ROOT::RFieldBase::RValue> ROOT::RFieldBase::SplitValue(const RValue
return std::vector<RValue>();
}

void ROOT::RFieldBase::Attach(std::unique_ptr<ROOT::RFieldBase> child)
void ROOT::RFieldBase::Attach(std::unique_ptr<ROOT::RFieldBase> child, std::string_view expectedChildName)
{
// Note that during a model update, new fields will be attached to the zero field. The zero field, however,
// does not change its inital state because only its sub fields get connected by RPageSink::UpdateSchema.
if (fState != EState::kUnconnected)
throw RException(R__FAIL("invalid attempt to attach subfield to already connected field"));

if (!expectedChildName.empty() && child->GetFieldName() != expectedChildName) {
throw RException(R__FAIL(std::string("invalid subfield name: ") + child->GetFieldName() +
" expected: " + std::string(expectedChildName)));
}

child->fParent = this;
fSubfields.emplace_back(std::move(child));
}
Expand Down
17 changes: 8 additions & 9 deletions tree/ntuple/src/RFieldMeta.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,7 @@ ROOT::RPairField::RPairField(std::string_view fieldName, std::array<std::unique_
if (typeAlias != GetTypeName())
fTypeAlias = typeAlias;

AttachItemFields(std::move(itemFields));
AttachItemFields(std::move(itemFields), true /* useNumberedFields */);
fOffsets.push_back(offsets[0]);
fOffsets.push_back(offsets[1]);
}
Expand All @@ -721,7 +721,7 @@ ROOT::RPairField::RPairField(std::string_view fieldName, std::array<std::unique_
if (typeAlias != GetTypeName())
fTypeAlias = typeAlias;

AttachItemFields(std::move(itemFields));
AttachItemFields(std::move(itemFields), true /* useNumberedFields */);

// ISO C++ does not guarantee any specific layout for `std::pair`; query TClass for the member offsets
auto *c = TClass::GetClass(GetTypeName().c_str());
Expand Down Expand Up @@ -852,11 +852,10 @@ ROOT::RProxiedCollectionField::RProxiedCollectionField(std::string_view fieldNam

std::unique_ptr<ROOT::RFieldBase> ROOT::RProxiedCollectionField::CloneImpl(std::string_view newName) const
{
auto newItemField = fSubfields[0]->Clone(fSubfields[0]->GetFieldName());
auto clone =
std::unique_ptr<RProxiedCollectionField>(new RProxiedCollectionField(newName, fProxy->GetCollectionClass()));
clone->fItemSize = fItemSize;
clone->Attach(std::move(newItemField));
clone->Attach(fSubfields[0]->Clone(fSubfields[0]->GetFieldName()));
return clone;
}

Expand Down Expand Up @@ -976,7 +975,7 @@ ROOT::RMapField::RMapField(std::string_view fieldName, EMapType mapType, std::un
auto *itemClass = fProxy->GetValueClass();
fItemSize = itemClass->GetClassSize();

Attach(std::move(itemField));
Attach(std::move(itemField), "_0");
}

std::unique_ptr<ROOT::RFieldBase> ROOT::RMapField::CloneImpl(std::string_view newName) const
Expand Down Expand Up @@ -1011,7 +1010,7 @@ ROOT::RSetField::RSetField(std::string_view fieldName, ESetType setType, std::un

fItemSize = itemField->GetValueSize();

Attach(std::move(itemField));
Attach(std::move(itemField), "_0");
}

std::unique_ptr<ROOT::RFieldBase> ROOT::RSetField::CloneImpl(std::string_view newName) const
Expand Down Expand Up @@ -1323,7 +1322,7 @@ ROOT::RTupleField::RTupleField(std::string_view fieldName, std::vector<std::uniq
if (typeAlias != GetTypeName())
fTypeAlias = typeAlias;

AttachItemFields(std::move(itemFields));
AttachItemFields(std::move(itemFields), true /* useNumberedFields */);
fOffsets = offsets;
}

Expand All @@ -1334,7 +1333,7 @@ ROOT::RTupleField::RTupleField(std::string_view fieldName, std::vector<std::uniq
if (typeAlias != GetTypeName())
fTypeAlias = typeAlias;

AttachItemFields(std::move(itemFields));
AttachItemFields(std::move(itemFields), true /* useNumberedFields */);

auto *c = TClass::GetClass(GetTypeName().c_str());
if (!c)
Expand Down Expand Up @@ -1438,7 +1437,7 @@ ROOT::RVariantField::RVariantField(std::string_view fieldName, std::vector<std::
fMaxItemSize = std::max(fMaxItemSize, itemFields[i]->GetValueSize());
fMaxAlignment = std::max(fMaxAlignment, itemFields[i]->GetAlignment());
fTraits &= itemFields[i]->GetTraits();
Attach(std::move(itemFields[i]));
Attach(std::move(itemFields[i]), "_" + std::to_string(i));
}

// With certain template parameters, the union of members of an std::variant starts at an offset > 0.
Expand Down
10 changes: 5 additions & 5 deletions tree/ntuple/src/RFieldSequenceContainer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ ROOT::RArrayField::RArrayField(std::string_view fieldName, std::unique_ptr<RFiel
fTypeAlias = "std::array<" + itemField->GetTypeAlias() + "," +
Internal::GetNormalizedInteger(static_cast<unsigned long long>(arrayLength)) + ">";
}
Attach(std::move(itemField));
Attach(std::move(itemField), "_0");
}

std::unique_ptr<ROOT::RFieldBase> ROOT::RArrayField::CloneImpl(std::string_view newName) const
Expand Down Expand Up @@ -251,7 +251,7 @@ ROOT::RRVecField::RRVecField(std::string_view fieldName, std::unique_ptr<RFieldB
fItemDeleter = GetDeleterOf(*itemField);
if (!itemField->GetTypeAlias().empty())
fTypeAlias = "ROOT::VecOps::RVec<" + itemField->GetTypeAlias() + ">";
Attach(std::move(itemField));
Attach(std::move(itemField), "_0");
fValueSize = EvalRVecValueSize(fSubfields[0]->GetAlignment(), fSubfields[0]->GetValueSize(), GetAlignment());

// Determine if we can optimimize bulk reading
Expand Down Expand Up @@ -560,7 +560,7 @@ ROOT::RVectorField::RVectorField(std::string_view fieldName, std::unique_ptr<RFi

if (!(itemField->GetTraits() & kTraitTriviallyDestructible))
fItemDeleter = GetDeleterOf(*itemField);
Attach(std::move(itemField));
Attach(std::move(itemField), "_0");
}

ROOT::RVectorField::RVectorField(std::string_view fieldName, std::unique_ptr<RFieldBase> itemField)
Expand Down Expand Up @@ -870,7 +870,7 @@ ROOT::RArrayAsRVecField::RArrayAsRVecField(std::string_view fieldName, std::uniq
{
if (!itemField->GetTypeAlias().empty())
fTypeAlias = "ROOT::VecOps::RVec<" + itemField->GetTypeAlias() + ">";
Attach(std::move(itemField));
Attach(std::move(itemField), "_0");
fValueSize = EvalRVecValueSize(fSubfields[0]->GetAlignment(), fSubfields[0]->GetValueSize(), GetAlignment());
if (!(fSubfields[0]->GetTraits() & kTraitTriviallyDestructible))
fItemDeleter = GetDeleterOf(*fSubfields[0]);
Expand Down Expand Up @@ -974,7 +974,7 @@ ROOT::RArrayAsVectorField::RArrayAsVectorField(std::string_view fieldName, std::
{
if (!itemField->GetTypeAlias().empty())
fTypeAlias = "std::vector<" + itemField->GetTypeAlias() + ">";
Attach(std::move(itemField));
Attach(std::move(itemField), "_0");
if (!(fSubfields[0]->GetTraits() & kTraitTriviallyDestructible))
fItemDeleter = GetDeleterOf(*fSubfields[0]);
}
Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/src/RNTupleProcessor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ class RAuxiliaryProcessorField final : public ROOT::RRecordField {
for (auto &item : itemFields) {
fOffsets.push_back(GetItemPadding(fSize, item->GetAlignment()));
}
AttachItemFields(std::move(itemFields));
AttachItemFields(std::move(itemFields), false /* useNumberedFields */);
}
};
} // namespace ROOT::Experimental::Internal
Expand Down
1 change: 1 addition & 0 deletions tree/ntuple/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ if(NOT MSVC)
ROOT_ADD_GTEST(ntuple_evolution_shape ntuple_evolution_shape.cxx LIBRARIES ROOTNTuple)
ROOT_ADD_GTEST(ntuple_emulated ntuple_emulated.cxx LIBRARIES ROOTNTuple)
endif()
ROOT_ADD_GTEST(ntuple_field_name ntuple_field_name.cxx LIBRARIES ROOTNTuple)
ROOT_ADD_GTEST(ntuple_join_table ntuple_join_table.cxx LIBRARIES ROOTNTuple)
ROOT_ADD_GTEST(ntuple_merger ntuple_merger.cxx LIBRARIES ROOTNTuple CustomStruct ZLIB::ZLIB Tree INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/tree/tree/inc)
ROOT_ADD_GTEST(ntuple_metrics ntuple_metrics.cxx LIBRARIES ROOTNTuple)
Expand Down
Loading
Loading