Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ arrow-cast = "58"
arrow-data = "58"
arrow-ipc = "58"
arrow-ord = "58"
arrow-schema = "58"
arrow-schema = { version = "58", features = ["canonical_extension_types"] }
arrow-select = "58"
arrow-string = "58"
async-fs = "2.2.0"
Expand Down
8 changes: 8 additions & 0 deletions vortex-array/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -14858,6 +14858,14 @@ pub mod vortex_array::extension::uuid

pub struct vortex_array::extension::uuid::Uuid

impl vortex_array::extension::uuid::Uuid

pub fn vortex_array::extension::uuid::Uuid::default(nullability: vortex_array::dtype::Nullability) -> vortex_array::dtype::extension::ExtDType<Self>

pub fn vortex_array::extension::uuid::Uuid::new(metadata: vortex_array::extension::uuid::UuidMetadata, nullability: vortex_array::dtype::Nullability) -> vortex_array::dtype::extension::ExtDType<Self>

pub fn vortex_array::extension::uuid::Uuid::storage_dtype(nullability: vortex_array::dtype::Nullability) -> vortex_array::dtype::DType

impl core::clone::Clone for vortex_array::extension::uuid::Uuid

pub fn vortex_array::extension::uuid::Uuid::clone(&self) -> vortex_array::extension::uuid::Uuid
Expand Down
116 changes: 116 additions & 0 deletions vortex-array/src/arrow/executor/fixed_size_binary.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::sync::Arc;

use arrow_array::ArrayRef as ArrowArrayRef;
use arrow_array::FixedSizeBinaryArray;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;

use crate::ArrayRef;
use crate::ExecutionCtx;
use crate::arrays::ExtensionArray;
use crate::arrays::FixedSizeListArray;
use crate::arrays::PrimitiveArray;
use crate::arrow::executor::validity::to_arrow_null_buffer;
use crate::dtype::DType;
use crate::dtype::PType;
use crate::vtable::ValidityHelper;

/// Convert a Vortex array to an Arrow `FixedSizeBinaryArray`.
///
/// Accepts either an extension array (e.g. UUID) or a plain `FixedSizeList(Primitive(U8), size)`.
pub(super) fn to_arrow_fixed_size_binary(
array: ArrayRef,
size: i32,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrowArrayRef> {
let storage = if array.dtype().is_extension() {
array
.execute::<ExtensionArray>(ctx)?
.storage_array()
.clone()
} else {
array
};

let fsl = storage.execute::<FixedSizeListArray>(ctx)?;

match fsl.dtype() {
DType::FixedSizeList(elem, list_size, _)
if *list_size == size as u32
&& matches!(elem.as_ref(), DType::Primitive(PType::U8, _)) => {}
other => {
vortex_bail!("FixedSizeBinary({size}) requires FixedSizeList(U8, {size}), got {other}");
}
}

let elements = fsl.elements().clone().execute::<PrimitiveArray>(ctx)?;
let values = elements.into_buffer::<u8>().into_arrow_buffer();
let null_buffer = to_arrow_null_buffer(fsl.validity(), fsl.len(), ctx)?;

Ok(Arc::new(FixedSizeBinaryArray::new(
size,
values,
null_buffer,
)))
}

#[cfg(test)]
mod tests {
use arrow_array::FixedSizeBinaryArray;
use arrow_schema::DataType;
use vortex_buffer::BitBuffer;
use vortex_buffer::Buffer;

use crate::IntoArray;
use crate::LEGACY_SESSION;
use crate::VortexSessionExecute;
use crate::arrays::ExtensionArray;
use crate::arrays::FixedSizeListArray;
use crate::arrays::PrimitiveArray;
use crate::arrow::ArrowArrayExecutor;
use crate::dtype::Nullability;
use crate::extension::uuid::Uuid;
use crate::extension::uuid::vtable::UUID_BYTE_LEN;
use crate::validity::Validity;

#[expect(
clippy::cast_possible_truncation,
reason = "UUID_BYTE_LEN always fits u32/i32"
)]
#[test]
fn test_uuid_to_fixed_size_binary() {
let u1 = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
let u2 = uuid::Uuid::parse_str("f47ac10b-58cc-4372-a567-0e02b2c3d479").unwrap();

let flat: Vec<u8> = [u1.as_bytes(), &[0u8; 16], u2.as_bytes()]
.into_iter()
.flatten()
.copied()
.collect();
let elements = PrimitiveArray::new(Buffer::from(flat), Validity::NonNullable).into_array();
let validity = Validity::from(BitBuffer::from_iter([true, false, true]));
let fsl = FixedSizeListArray::try_new(elements, UUID_BYTE_LEN as u32, validity, 3)
.unwrap()
.into_array();
let uuid_array = ExtensionArray::new(Uuid::default(Nullability::Nullable).erased(), fsl);

let mut ctx = LEGACY_SESSION.create_execution_ctx();
let arrow = uuid_array
.into_array()
.execute_arrow(
Some(&DataType::FixedSizeBinary(UUID_BYTE_LEN as i32)),
&mut ctx,
)
.unwrap();

let expected = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
[Some(u1.as_bytes().as_slice()), None, Some(u2.as_bytes())].into_iter(),
UUID_BYTE_LEN as i32,
)
.unwrap();
assert_eq!(arrow.as_ref(), &expected as &dyn arrow_array::Array);
}
}
6 changes: 4 additions & 2 deletions vortex-array/src/arrow/executor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod byte;
pub mod byte_view;
mod decimal;
mod dictionary;
mod fixed_size_binary;
mod fixed_size_list;
mod list;
mod list_view;
Expand Down Expand Up @@ -38,6 +39,7 @@ use crate::arrow::executor::byte::to_arrow_byte_array;
use crate::arrow::executor::byte_view::to_arrow_byte_view;
use crate::arrow::executor::decimal::to_arrow_decimal;
use crate::arrow::executor::dictionary::to_arrow_dictionary;
use crate::arrow::executor::fixed_size_binary::to_arrow_fixed_size_binary;
use crate::arrow::executor::fixed_size_list::to_arrow_fixed_list;
use crate::arrow::executor::list::to_arrow_list;
use crate::arrow::executor::list_view::to_arrow_list_view;
Expand Down Expand Up @@ -156,8 +158,8 @@ impl ArrowArrayExecutor for ArrayRef {
DataType::RunEndEncoded(ends_type, values_type) => {
to_arrow_run_end(self, ends_type.data_type(), values_type, ctx)
}
DataType::FixedSizeBinary(_)
| DataType::Map(..)
DataType::FixedSizeBinary(size) => to_arrow_fixed_size_binary(self, *size, ctx),
DataType::Map(..)
| DataType::Duration(_)
| DataType::Interval(_)
| DataType::Union(..) => {
Expand Down
53 changes: 49 additions & 4 deletions vortex-array/src/dtype/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use arrow_schema::Schema;
use arrow_schema::SchemaBuilder;
use arrow_schema::SchemaRef;
use arrow_schema::TimeUnit as ArrowTimeUnit;
use arrow_schema::extension::ExtensionType as _;
use vortex_error::VortexError;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
Expand All @@ -42,6 +43,8 @@ use crate::extension::datetime::TemporalMetadata;
use crate::extension::datetime::Time;
use crate::extension::datetime::TimeUnit;
use crate::extension::datetime::Timestamp;
use crate::extension::uuid::Uuid;
use crate::extension::uuid::vtable::UUID_BYTE_LEN;

/// Trait for converting Arrow types to Vortex types.
pub trait FromArrowType<T>: Sized {
Expand Down Expand Up @@ -210,15 +213,22 @@ impl FromArrowType<(&DataType, Nullability)> for DType {

impl FromArrowType<&Field> for DType {
fn from_arrow(field: &Field) -> Self {
let nullability = Nullability::from(field.is_nullable());

if field
.metadata()
.get("ARROW:extension:name")
.map(|s| s.as_str())
== Some("arrow.parquet.variant")
{
return DType::Variant(field.is_nullable().into());
return DType::Variant(nullability);
}

if field.extension_type_name() == Some(arrow_schema::extension::Uuid::NAME) {
return DType::Extension(Uuid::default(nullability).erased());
}
Self::from_arrow((field.data_type(), field.is_nullable().into()))

Self::from_arrow((field.data_type(), nullability))
}
}

Expand All @@ -245,11 +255,17 @@ impl DType {
.into(),
)
} else {
Field::new(
let mut field = Field::new(
field_name.as_ref(),
field_dtype.to_arrow_dtype()?,
field_dtype.is_nullable(),
)
);
if let DType::Extension(ext) = field_dtype
&& ext.is::<Uuid>()
{
field = field.with_extension_type(arrow_schema::extension::Uuid);
}
field
};
builder.push(field);
}
Expand Down Expand Up @@ -349,6 +365,14 @@ impl DType {
});
};

if ext_dtype.is::<Uuid>() {
#[expect(
clippy::cast_possible_truncation,
reason = "UUID_BYTE_LEN always fits i32"
)]
return Ok(DataType::FixedSizeBinary(UUID_BYTE_LEN as i32));
}

vortex_bail!("Unsupported extension type \"{}\"", ext_dtype.id())
}
})
Expand Down Expand Up @@ -561,4 +585,25 @@ mod test {

assert_eq!(original_dtype, roundtripped_dtype);
}

#[test]
fn test_uuid_schema_roundtrip() {
let original = DType::struct_(
[(
"id",
DType::Extension(Uuid::default(Nullability::Nullable).erased()),
)],
Nullability::NonNullable,
);
let schema = original.to_arrow_schema().unwrap();

let field = schema.field(0);
assert_eq!(field.data_type(), &DataType::FixedSizeBinary(16));
assert_eq!(
field.extension_type_name(),
Some(arrow_schema::extension::Uuid::NAME)
);

assert_eq!(DType::from_arrow(&schema), original);
}
}
36 changes: 36 additions & 0 deletions vortex-array/src/extension/uuid/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,42 @@ pub use metadata::UuidMetadata;

pub(crate) mod vtable;

use std::sync::Arc;

use vortex_error::VortexExpect;

use crate::dtype::DType;
use crate::dtype::Nullability;
use crate::dtype::PType;
use crate::dtype::extension::ExtDType;

/// The VTable for the UUID extension type.
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
pub struct Uuid;

#[expect(
clippy::cast_possible_truncation,
reason = "UUID_BYTE_LEN always fits u32"
)]
#[allow(clippy::same_name_method)]
impl Uuid {
/// Returns the canonical UUID storage dtype: `FixedSizeList(Primitive(U8, NonNullable), 16)`.
pub fn storage_dtype(nullability: Nullability) -> DType {
DType::FixedSizeList(
Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)),
vtable::UUID_BYTE_LEN as u32,
nullability,
)
}

/// Creates a new UUID extension dtype with the given metadata and nullability.
pub fn new(metadata: UuidMetadata, nullability: Nullability) -> ExtDType<Self> {
ExtDType::try_new(metadata, Self::storage_dtype(nullability))
.vortex_expect("valid UUID storage dtype")
}

/// Creates a new UUID extension dtype with default metadata.
pub fn default(nullability: Nullability) -> ExtDType<Self> {
Self::new(UuidMetadata::default(), nullability)
}
}
41 changes: 13 additions & 28 deletions vortex-array/src/extension/uuid/vtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,11 @@ mod tests {
#[case::non_nullable(Nullability::NonNullable)]
#[case::nullable(Nullability::Nullable)]
fn validate_correct_storage_dtype(#[case] nullability: Nullability) -> VortexResult<()> {
let metadata = UuidMetadata::default();
let storage_dtype = uuid_storage_dtype(nullability);
ExtDType::try_with_vtable(Uuid, metadata, storage_dtype)?;
ExtDType::try_with_vtable(
Uuid,
UuidMetadata::default(),
Uuid::storage_dtype(nullability),
)?;
Ok(())
}

Expand Down Expand Up @@ -229,10 +231,7 @@ mod tests {
let expected = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")
.map_err(|e| vortex_error::vortex_err!("{e}"))?;

let ext_dtype = ExtDType::try_new(
UuidMetadata::default(),
uuid_storage_dtype(Nullability::NonNullable),
)?;
let ext_dtype = Uuid::default(Nullability::NonNullable);
let children: Vec<Scalar> = expected
.as_bytes()
.iter()
Expand Down Expand Up @@ -261,13 +260,12 @@ mod tests {
assert_eq!(v4_uuid.get_version(), Some(Version::Random));

// Metadata says v7, but the UUID is v4.
let ext_dtype = ExtDType::try_with_vtable(
Uuid,
let ext_dtype = Uuid::new(
UuidMetadata {
version: Some(Version::SortRand),
},
uuid_storage_dtype(Nullability::NonNullable),
)?;
Nullability::NonNullable,
);
let children: Vec<Scalar> = v4_uuid
.as_bytes()
.iter()
Expand Down Expand Up @@ -307,13 +305,12 @@ mod tests {
let v4_uuid = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")
.map_err(|e| vortex_error::vortex_err!("{e}"))?;

let ext_dtype = ExtDType::try_new(
let ext_dtype = Uuid::new(
UuidMetadata {
version: Some(Version::Random),
},
uuid_storage_dtype(Nullability::NonNullable),
)
.unwrap();
Nullability::NonNullable,
);
let storage_value = uuid_storage_scalar(&v4_uuid);

let result = Uuid::unpack_native(&ext_dtype, &storage_value)?;
Expand All @@ -327,23 +324,11 @@ mod tests {
let v4_uuid = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")
.map_err(|e| vortex_error::vortex_err!("{e}"))?;

let ext_dtype = ExtDType::try_new(
UuidMetadata::default(),
uuid_storage_dtype(Nullability::NonNullable),
)
.unwrap();
let ext_dtype = Uuid::default(Nullability::NonNullable);
let storage_value = uuid_storage_scalar(&v4_uuid);

let result = Uuid::unpack_native(&ext_dtype, &storage_value)?;
assert_eq!(result, v4_uuid);
Ok(())
}

fn uuid_storage_dtype(nullability: Nullability) -> DType {
DType::FixedSizeList(
Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)),
UUID_BYTE_LEN as u32,
nullability,
)
}
}
Loading