From c6af87fab07e47a266bc9a543b761d94b6605067 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 14 Apr 2023 10:53:23 +0200 Subject: [PATCH 1/4] update to arrow 0.17 --- arrow2_convert/Cargo.toml | 2 +- arrow2_convert/src/deserialize.rs | 3 ++- arrow2_convert/tests/test_enum.rs | 2 +- arrow2_convert/tests/test_struct.rs | 4 ++-- arrow2_convert_derive/src/derive_enum.rs | 2 +- examples/simple/Cargo.toml | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arrow2_convert/Cargo.toml b/arrow2_convert/Cargo.toml index 87ac7cf..de329fd 100644 --- a/arrow2_convert/Cargo.toml +++ b/arrow2_convert/Cargo.toml @@ -12,7 +12,7 @@ repository = "https://github.com/DataEngineeringLabs/arrow2-convert" description = "Convert between nested rust types and Arrow with arrow2" [dependencies] -arrow2 = "0.16" +arrow2 = "0.17" arrow2_convert_derive = { version = "0.4.2", path = "../arrow2_convert_derive", optional = true } chrono = { version = "0.4", default_features = false, features = ["std"] } err-derive = "0.3" diff --git a/arrow2_convert/src/deserialize.rs b/arrow2_convert/src/deserialize.rs index 0f46d2c..87f50e5 100644 --- a/arrow2_convert/src/deserialize.rs +++ b/arrow2_convert/src/deserialize.rs @@ -195,7 +195,8 @@ impl<'a> Iterator for BufferBinaryArrayIter<'a> { } let (start, end) = self.array.offsets().start_end(self.index); self.index += 1; - Some(Some(self.array.values().clone().slice(start, end - start))) + + Some(Some(self.array.values().clone().sliced(start, end - start))) } } } diff --git a/arrow2_convert/tests/test_enum.rs b/arrow2_convert/tests/test_enum.rs index b39cbed..9d78586 100644 --- a/arrow2_convert/tests/test_enum.rs +++ b/arrow2_convert/tests/test_enum.rs @@ -125,7 +125,7 @@ fn test_slice() { let b: Box = enums.try_into_arrow().unwrap(); for i in 0..enums.len() { - let arrow_slice = b.slice(i, enums.len() - i); + let arrow_slice = b.sliced(i, enums.len() - i); let original_slice = &enums[i..enums.len()]; let round_trip: Vec = arrow_slice.try_into_collection().unwrap(); assert_eq!(round_trip, original_slice); diff --git a/arrow2_convert/tests/test_struct.rs b/arrow2_convert/tests/test_struct.rs index d5ea3bb..b9cd3c1 100644 --- a/arrow2_convert/tests/test_struct.rs +++ b/arrow2_convert/tests/test_struct.rs @@ -46,7 +46,7 @@ fn test_slice() { let b: Box = original.try_into_arrow().unwrap(); for i in 0..original.len() { - let arrow_slice = b.slice(i, original.len() - i); + let arrow_slice = b.sliced(i, original.len() - i); let original_slice = &original[i..original.len()]; let round_trip: Vec = arrow_slice.try_into_collection().unwrap(); assert_eq!(round_trip, original_slice); @@ -84,7 +84,7 @@ fn test_nested_slice() { let b: Box = original.try_into_arrow().unwrap(); for i in 0..original.len() { - let arrow_slice = b.slice(i, original.len() - i); + let arrow_slice = b.sliced(i, original.len() - i); let original_slice = &original[i..original.len()]; let round_trip: Vec = arrow_slice.try_into_collection().unwrap(); assert_eq!(round_trip, original_slice); diff --git a/arrow2_convert_derive/src/derive_enum.rs b/arrow2_convert_derive/src/derive_enum.rs index cfe0a88..f549ff6 100644 --- a/arrow2_convert_derive/src/derive_enum.rs +++ b/arrow2_convert_derive/src/derive_enum.rs @@ -502,7 +502,7 @@ pub fn expand_deserialize(input: DeriveEnum) -> TokenStream { return None; }; let (type_idx, offset) = self.arr.index(next_index); - let slice = self.arr.fields()[type_idx].slice(offset, 1); + let slice = self.arr.fields()[type_idx].sliced(offset, 1); match type_idx { #iter_next_match_block _ => panic!("Invalid type for {}", #original_name_str) diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index 80b5caf..6b843c9 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -6,5 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -arrow2 = "0.16" +arrow2 = "0.17" arrow2_convert = { version = "0.4.2", path = "../../arrow2_convert" } From c4bab34f1b7605d4ec082b38aceefc7b5daee9e7 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 14 Apr 2023 10:58:42 +0200 Subject: [PATCH 2/4] bump to 0.5.0 --- arrow2_convert/Cargo.toml | 6 +++--- arrow2_convert_derive/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arrow2_convert/Cargo.toml b/arrow2_convert/Cargo.toml index de329fd..6fc9ccc 100644 --- a/arrow2_convert/Cargo.toml +++ b/arrow2_convert/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "arrow2_convert" -version = "0.4.2" +version = "0.5.0" authors = [ "Jorge Leitao ", "Chandra Penke ", @@ -13,12 +13,12 @@ description = "Convert between nested rust types and Arrow with arrow2" [dependencies] arrow2 = "0.17" -arrow2_convert_derive = { version = "0.4.2", path = "../arrow2_convert_derive", optional = true } +arrow2_convert_derive = { version = "0.5.0", path = "../arrow2_convert_derive", optional = true } chrono = { version = "0.4", default_features = false, features = ["std"] } err-derive = "0.3" [dev-dependencies] -arrow2_convert_derive = { version = "0.4.2", path = "../arrow2_convert_derive" } +arrow2_convert_derive = { version = "0.5.0", path = "../arrow2_convert_derive" } criterion = "0.4" trybuild = "1.0" diff --git a/arrow2_convert_derive/Cargo.toml b/arrow2_convert_derive/Cargo.toml index 6e2f47a..d5a51d8 100644 --- a/arrow2_convert_derive/Cargo.toml +++ b/arrow2_convert_derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "arrow2_convert_derive" -version = "0.4.2" +version = "0.5.0" authors = [ "Jorge Leitao ", "Chandra Penke " diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index 6b843c9..a628b0e 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" [dependencies] arrow2 = "0.17" -arrow2_convert = { version = "0.4.2", path = "../../arrow2_convert" } +arrow2_convert = { version = "0.5.0", path = "../../arrow2_convert" } From 55f4afd287306ee2f3b909195883f9faf4393515 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 14 Apr 2023 11:13:50 +0200 Subject: [PATCH 3/4] update to new datatypes --- Cargo.toml | 3 ++ arrow2_convert/src/field.rs | 10 +++-- arrow2_convert/tests/complex_example.rs | 3 +- arrow2_convert/tests/test_round_trip.rs | 10 ++--- arrow2_convert/tests/test_schema.rs | 44 +++++++++++----------- arrow2_convert/tests/test_serialize.rs | 2 +- arrow2_convert_derive/src/derive_enum.rs | 4 +- arrow2_convert_derive/src/derive_struct.rs | 12 +++--- 8 files changed, 49 insertions(+), 39 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e840284..f919922 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,6 @@ members = [ "arrow2_convert_derive", "examples/simple" ] + +[patch.crates-io] +arrow2 = { git = "https://github.com/rerun-io/arrow2", branch = "cmc/arc_datatype" } diff --git a/arrow2_convert/src/field.rs b/arrow2_convert/src/field.rs index c182233..d8813af 100644 --- a/arrow2_convert/src/field.rs +++ b/arrow2_convert/src/field.rs @@ -1,5 +1,7 @@ //! Implementation and traits for mapping rust types to Arrow types +use std::sync::Arc; + use arrow2::{ buffer::Buffer, datatypes::{DataType, Field}, @@ -225,7 +227,7 @@ where #[inline] fn data_type() -> DataType { - DataType::List(Box::new(::field("item"))) + DataType::List(Arc::new(::field("item"))) } } @@ -238,7 +240,7 @@ where #[inline] fn data_type() -> arrow2::datatypes::DataType { - arrow2::datatypes::DataType::List(Box::new(::field("item"))) + arrow2::datatypes::DataType::List(Arc::new(::field("item"))) } } @@ -255,7 +257,7 @@ where #[inline] fn data_type() -> arrow2::datatypes::DataType { - arrow2::datatypes::DataType::LargeList(Box::new(::field("item"))) + arrow2::datatypes::DataType::LargeList(Arc::new(::field("item"))) } } @@ -272,7 +274,7 @@ where #[inline] fn data_type() -> arrow2::datatypes::DataType { - arrow2::datatypes::DataType::FixedSizeList(Box::new(::field("item")), SIZE) + arrow2::datatypes::DataType::FixedSizeList(Arc::new(::field("item")), SIZE) } } diff --git a/arrow2_convert/tests/complex_example.rs b/arrow2_convert/tests/complex_example.rs index b572c49..aad5061 100644 --- a/arrow2_convert/tests/complex_example.rs +++ b/arrow2_convert/tests/complex_example.rs @@ -7,6 +7,7 @@ use arrow2_convert::serialize::TryIntoArrow; /// - Custom types use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize}; use std::borrow::Borrow; +use std::sync::Arc; #[derive(Debug, Clone, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)] pub struct Root { @@ -85,7 +86,7 @@ impl arrow2_convert::field::ArrowField for CustomType { fn data_type() -> arrow2::datatypes::DataType { arrow2::datatypes::DataType::Extension( "custom".to_string(), - Box::new(arrow2::datatypes::DataType::UInt64), + Arc::new(arrow2::datatypes::DataType::UInt64), None, ) } diff --git a/arrow2_convert/tests/test_round_trip.rs b/arrow2_convert/tests/test_round_trip.rs index d9a6ed3..198eb1f 100644 --- a/arrow2_convert/tests/test_round_trip.rs +++ b/arrow2_convert/tests/test_round_trip.rs @@ -60,7 +60,7 @@ fn test_large_string_nested() { let b: Box = strs.try_into_arrow_as_type::>().unwrap(); assert_eq!( b.data_type(), - &DataType::List(Box::new(Field::new("item", DataType::LargeUtf8, false))) + &DataType::List(Arc::new(Field::new("item", DataType::LargeUtf8, false))) ); let round_trip: Vec> = b.try_into_collection_as_type::>().unwrap(); assert_eq!(round_trip, strs); @@ -81,7 +81,7 @@ fn test_large_binary_nested() { let b: Box = strs.try_into_arrow_as_type::>().unwrap(); assert_eq!( b.data_type(), - &DataType::List(Box::new(Field::new("item", DataType::LargeBinary, false))) + &DataType::List(Arc::new(Field::new("item", DataType::LargeBinary, false))) ); let round_trip: Vec>> = b.try_into_collection_as_type::>().unwrap(); @@ -105,7 +105,7 @@ fn test_large_vec() { let b: Box = ints.try_into_arrow_as_type::>().unwrap(); assert_eq!( b.data_type(), - &DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false))) + &DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, false))) ); let round_trip: Vec> = b.try_into_collection_as_type::>().unwrap(); assert_eq!(round_trip, ints); @@ -119,7 +119,7 @@ fn test_large_vec_nested() { .unwrap(); assert_eq!( b.data_type(), - &DataType::LargeList(Box::new(Field::new("item", DataType::LargeBinary, false))) + &DataType::LargeList(Arc::new(Field::new("item", DataType::LargeBinary, false))) ); let round_trip: Vec>> = b .try_into_collection_as_type::>() @@ -135,7 +135,7 @@ fn test_fixed_size_vec() { .unwrap(); assert_eq!( b.data_type(), - &DataType::FixedSizeList(Box::new(Field::new("item", DataType::Int32, false)), 3) + &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3) ); let round_trip: Vec> = b .try_into_collection_as_type::>() diff --git a/arrow2_convert/tests/test_schema.rs b/arrow2_convert/tests/test_schema.rs index 1e465df..924e423 100644 --- a/arrow2_convert/tests/test_schema.rs +++ b/arrow2_convert/tests/test_schema.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::datatypes::*; use arrow2_convert::ArrowField; @@ -84,7 +86,7 @@ fn test_schema_types() { fn data_type() -> arrow2::datatypes::DataType { arrow2::datatypes::DataType::Extension( "custom".to_string(), - Box::new(arrow2::datatypes::DataType::UInt64), + Arc::new(arrow2::datatypes::DataType::UInt64), None, ) } @@ -118,7 +120,7 @@ fn test_schema_types() { assert_eq!( ::data_type(), - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("name", DataType::Utf8, true), Field::new("is_deleted", DataType::Boolean, false), Field::new("a1", DataType::Float64, true), @@ -130,7 +132,7 @@ fn test_schema_types() { Field::new("a7", DataType::Decimal(32, 32), false), Field::new( "date_time_list", - DataType::List(Box::new(Field::new( + DataType::List(Arc::new(Field::new( "item", DataType::Timestamp(TimeUnit::Nanosecond, None), false @@ -139,47 +141,47 @@ fn test_schema_types() { ), Field::new( "nullable_list", - DataType::List(Box::new(Field::new("item", DataType::Utf8, true))), + DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), true ), Field::new( "required_list", - DataType::List(Box::new(Field::new("item", DataType::Utf8, true))), + DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), false ), Field::new( "custom", - DataType::Extension("custom".to_string(), Box::new(DataType::UInt64), None), + DataType::Extension("custom".to_string(), Arc::new(DataType::UInt64), None), false ), Field::new( "nullable_custom", - DataType::Extension("custom".to_string(), Box::new(DataType::UInt64), None), + DataType::Extension("custom".to_string(), Arc::new(DataType::UInt64), None), true ), Field::new( "custom_list", - DataType::List(Box::new(Field::new( + DataType::List(Arc::new(Field::new( "item", - DataType::Extension("custom".to_string(), Box::new(DataType::UInt64), None), + DataType::Extension("custom".to_string(), Arc::new(DataType::UInt64), None), false ))), false ), Field::new( "child", - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("a1", DataType::Int64, false), Field::new("a2", DataType::Utf8, false), Field::new( "child_array", - DataType::List(Box::new(Field::new( + DataType::List(Arc::new(Field::new( "item", - DataType::Struct(vec![ + DataType::Struct(Arc::new(vec![ Field::new("a1", DataType::Int32, false), Field::new( "bool_array", - DataType::List(Box::new(Field::new( + DataType::List(Arc::new(Field::new( "item", DataType::Boolean, false @@ -188,24 +190,24 @@ fn test_schema_types() { ), Field::new( "int64_array", - DataType::List(Box::new(Field::new( + DataType::List(Arc::new(Field::new( "item", DataType::Int64, false ))), false ), - ]), + ])), false ))), false ) - ]), + ])), false ), Field::new( "int32_array", - DataType::List(Box::new(Field::new("item", DataType::Int32, false))), + DataType::List(Arc::new(Field::new("item", DataType::Int32, false))), false ), Field::new("large_binary", DataType::LargeBinary, false), @@ -213,15 +215,15 @@ fn test_schema_types() { Field::new("large_string", DataType::LargeUtf8, false), Field::new( "large_vec", - DataType::LargeList(Box::new(Field::new("item", DataType::Int64, false))), + DataType::LargeList(Arc::new(Field::new("item", DataType::Int64, false))), false ), Field::new( "fixed_size_vec", - DataType::FixedSizeList(Box::new(Field::new("item", DataType::Int64, false)), 3), + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, false)), 3), false ), - ]) + ])) ); } @@ -238,6 +240,6 @@ fn test_large_string_schema() { assert_eq!( as arrow2_convert::field::ArrowField>::data_type(), - DataType::List(Box::new(Field::new("item", DataType::LargeUtf8, false))) + DataType::List(Arc::new(Field::new("item", DataType::LargeUtf8, false))) ); } diff --git a/arrow2_convert/tests/test_serialize.rs b/arrow2_convert/tests/test_serialize.rs index 70ecd9e..305d7c8 100644 --- a/arrow2_convert/tests/test_serialize.rs +++ b/arrow2_convert/tests/test_serialize.rs @@ -98,7 +98,7 @@ fn test_field_serialize_error() { fn data_type() -> arrow2::datatypes::DataType { arrow2::datatypes::DataType::Extension( "custom".to_string(), - Box::new(arrow2::datatypes::DataType::UInt64), + Arc::new(arrow2::datatypes::DataType::UInt64), None, ) } diff --git a/arrow2_convert_derive/src/derive_enum.rs b/arrow2_convert_derive/src/derive_enum.rs index f549ff6..7aabae9 100644 --- a/arrow2_convert_derive/src/derive_enum.rs +++ b/arrow2_convert_derive/src/derive_enum.rs @@ -93,11 +93,11 @@ pub fn expand_field(input: DeriveEnum) -> TokenStream { fn data_type() -> arrow2::datatypes::DataType { arrow2::datatypes::DataType::Union( - vec![ + std::sync::Arc::new(vec![ #( <#variant_types as arrow2_convert::field::ArrowField>::field(#variant_names_str), )* - ], + ]), None, #union_type, ) diff --git a/arrow2_convert_derive/src/derive_struct.rs b/arrow2_convert_derive/src/derive_struct.rs index a7b73f7..d2da9bd 100644 --- a/arrow2_convert_derive/src/derive_struct.rs +++ b/arrow2_convert_derive/src/derive_struct.rs @@ -115,11 +115,13 @@ pub fn expand_field(input: DeriveStruct) -> TokenStream { syn::Member::Named(ident) => format_ident!("{}", ident), syn::Member::Unnamed(index) => format_ident!("field_{}", index), }); - quote!(arrow2::datatypes::DataType::Struct(vec![ - #( - <#field_types as arrow2_convert::field::ArrowField>::field(stringify!(#field_names)), - )* - ])) + quote!(arrow2::datatypes::DataType::Struct(std::sync::Arc::new( + vec![ + #( + <#field_types as arrow2_convert::field::ArrowField>::field(stringify!(#field_names)), + )* + ] + ))) } }; From 5a88467b9d561a0179b2cf81d1f0128fc7dfe147 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Mon, 17 Apr 2023 11:13:45 +0200 Subject: [PATCH 4/4] clippy --- arrow2_convert/tests/test_serialize.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow2_convert/tests/test_serialize.rs b/arrow2_convert/tests/test_serialize.rs index 305d7c8..209182d 100644 --- a/arrow2_convert/tests/test_serialize.rs +++ b/arrow2_convert/tests/test_serialize.rs @@ -73,14 +73,14 @@ fn test_array() { #[test] fn test_buffer() { // Buffer and Vec should serialize into BinaryArray - let dat: Vec> = vec![(0..10).into_iter().collect()]; + let dat: Vec> = vec![(0..10).collect()]; let r: Box = dat.try_into_arrow().unwrap(); assert_eq!(r.len(), 1); assert_eq!(r.data_type(), & as ArrowField>::data_type()); assert_eq!(r.data_type(), & as ArrowField>::data_type()); // Buffer and Vec should serialize into ListArray - let dat: Vec> = vec![(0..10).into_iter().collect()]; + let dat: Vec> = vec![(0..10).collect()]; let r: Box = dat.try_into_arrow().unwrap(); assert_eq!(r.len(), 1); assert_eq!(r.data_type(), & as ArrowField>::data_type());