1818//! [`VariantArray`] implementation
1919
2020use crate :: VariantArrayBuilder ;
21- use crate :: type_conversion:: { generic_conversion_single_value, primitive_conversion_single_value} ;
21+ use crate :: type_conversion:: {
22+ generic_conversion_single_value, generic_conversion_single_value_with_result,
23+ primitive_conversion_single_value,
24+ } ;
2225use arrow:: array:: { Array , ArrayRef , AsArray , BinaryViewArray , StructArray } ;
2326use arrow:: buffer:: NullBuffer ;
2427use arrow:: compute:: cast;
@@ -27,6 +30,7 @@ use arrow::datatypes::{
2730 Float64Type , Int8Type , Int16Type , Int32Type , Int64Type , Time64MicrosecondType ,
2831 TimestampMicrosecondType , TimestampNanosecondType ,
2932} ;
33+ use arrow:: error:: Result ;
3034use arrow_schema:: extension:: ExtensionType ;
3135use arrow_schema:: { ArrowError , DataType , Field , FieldRef , Fields , TimeUnit } ;
3236use chrono:: { DateTime , NaiveTime } ;
@@ -58,11 +62,11 @@ impl ExtensionType for VariantType {
5862 Some ( String :: new ( ) )
5963 }
6064
61- fn deserialize_metadata ( _metadata : Option < & str > ) -> Result < Self :: Metadata , ArrowError > {
65+ fn deserialize_metadata ( _metadata : Option < & str > ) -> Result < Self :: Metadata > {
6266 Ok ( "" )
6367 }
6468
65- fn supports_data_type ( & self , data_type : & DataType ) -> Result < ( ) , ArrowError > {
69+ fn supports_data_type ( & self , data_type : & DataType ) -> Result < ( ) > {
6670 if matches ! ( data_type, DataType :: Struct ( _) ) {
6771 Ok ( ( ) )
6872 } else {
@@ -72,7 +76,7 @@ impl ExtensionType for VariantType {
7276 }
7377 }
7478
75- fn try_new ( data_type : & DataType , _metadata : Self :: Metadata ) -> Result < Self , ArrowError > {
79+ fn try_new ( data_type : & DataType , _metadata : Self :: Metadata ) -> Result < Self > {
7680 Self . supports_data_type ( data_type) ?;
7781 Ok ( Self )
7882 }
@@ -249,7 +253,7 @@ impl VariantArray {
249253 /// int8.
250254 ///
251255 /// Currently, only [`BinaryViewArray`] are supported.
252- pub fn try_new ( inner : & dyn Array ) -> Result < Self , ArrowError > {
256+ pub fn try_new ( inner : & dyn Array ) -> Result < Self > {
253257 // Workaround lack of support for Binary
254258 // https://github.com/apache/arrow-rs/issues/8387
255259 let inner = cast_to_binary_view_arrays ( inner) ?;
@@ -325,12 +329,32 @@ impl VariantArray {
325329
326330 /// Return the [`Variant`] instance stored at the given row
327331 ///
328- /// Note: This method does not check for nulls and the value is arbitrary
329- /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index .
332+ /// This is a convenience wrapper that calls [`VariantArray::try_value`] and unwraps the `Result`.
333+ /// Use `try_value` if you need to handle conversion errors gracefully .
330334 ///
331335 /// # Panics
332336 /// * if the index is out of bounds
333337 /// * if the array value is null
338+ /// * if `try_value` returns an error.
339+ pub fn value ( & self , index : usize ) -> Variant < ' _ , ' _ > {
340+ self . try_value ( index) . unwrap ( )
341+ }
342+
343+ /// Return the [`Variant`] instance stored at the given row
344+ ///
345+ /// Note: This method does not check for nulls and the value is arbitrary
346+ /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
347+ ///
348+ /// # Panics
349+ ///
350+ /// Panics if
351+ /// * the index is out of bounds
352+ /// * the array value is null
353+ ///
354+ /// # Errors
355+ ///
356+ /// Errors if
357+ /// - the data in `typed_value` cannot be interpreted as a valid `Variant`
334358 ///
335359 /// If this is a shredded variant but has no value at the shredded location, it
336360 /// will return [`Variant::Null`].
@@ -343,19 +367,19 @@ impl VariantArray {
343367 ///
344368 /// Note: Does not do deep validation of the [`Variant`], so it is up to the
345369 /// caller to ensure that the metadata and value were constructed correctly.
346- pub fn value ( & self , index : usize ) -> Variant < ' _ , ' _ > {
370+ pub fn try_value ( & self , index : usize ) -> Result < Variant < ' _ , ' _ > > {
347371 match ( self . typed_value_field ( ) , self . value_field ( ) ) {
348372 // Always prefer typed_value, if available
349373 ( Some ( typed_value) , value) if typed_value. is_valid ( index) => {
350374 typed_value_to_variant ( typed_value, value, index)
351375 }
352376 // Otherwise fall back to value, if available
353377 ( _, Some ( value) ) if value. is_valid ( index) => {
354- Variant :: new ( self . metadata . value ( index) , value. value ( index) )
378+ Ok ( Variant :: new ( self . metadata . value ( index) , value. value ( index) ) )
355379 }
356380 // It is technically invalid for neither value nor typed_value fields to be available,
357381 // but the spec specifically requires readers to return Variant::Null in this case.
358- _ => Variant :: Null ,
382+ _ => Ok ( Variant :: Null ) ,
359383 }
360384 }
361385
@@ -603,7 +627,7 @@ impl ShreddedVariantFieldArray {
603627 /// or be a list, large_list, list_view or struct
604628 ///
605629 /// Currently, only `value` columns of type [`BinaryViewArray`] are supported.
606- pub fn try_new ( inner : & dyn Array ) -> Result < Self , ArrowError > {
630+ pub fn try_new ( inner : & dyn Array ) -> Result < Self > {
607631 let Some ( inner_struct) = inner. as_struct_opt ( ) else {
608632 return Err ( ArrowError :: InvalidArgumentError (
609633 "Invalid ShreddedVariantFieldArray: requires StructArray as input" . to_string ( ) ,
@@ -835,7 +859,7 @@ impl<'a> BorrowedShreddingState<'a> {
835859impl < ' a > TryFrom < & ' a StructArray > for BorrowedShreddingState < ' a > {
836860 type Error = ArrowError ;
837861
838- fn try_from ( inner_struct : & ' a StructArray ) -> Result < Self , ArrowError > {
862+ fn try_from ( inner_struct : & ' a StructArray ) -> Result < Self > {
839863 // The `value` column need not exist, but if it does it must be a binary view.
840864 let value = if let Some ( value_col) = inner_struct. column_by_name ( "value" ) {
841865 let Some ( binary_view) = value_col. as_binary_view_opt ( ) else {
@@ -856,7 +880,7 @@ impl<'a> TryFrom<&'a StructArray> for BorrowedShreddingState<'a> {
856880impl TryFrom < & StructArray > for ShreddingState {
857881 type Error = ArrowError ;
858882
859- fn try_from ( inner_struct : & StructArray ) -> Result < Self , ArrowError > {
883+ fn try_from ( inner_struct : & StructArray ) -> Result < Self > {
860884 Ok ( BorrowedShreddingState :: try_from ( inner_struct) ?. into ( ) )
861885 }
862886}
@@ -914,34 +938,34 @@ fn typed_value_to_variant<'a>(
914938 typed_value : & ' a ArrayRef ,
915939 value : Option < & BinaryViewArray > ,
916940 index : usize ,
917- ) -> Variant < ' a , ' a > {
941+ ) -> Result < Variant < ' a , ' a > > {
918942 let data_type = typed_value. data_type ( ) ;
919943 if value. is_some_and ( |v| !matches ! ( data_type, DataType :: Struct ( _) ) && v. is_valid ( index) ) {
920944 // Only a partially shredded struct is allowed to have values for both columns
921945 panic ! ( "Invalid variant, conflicting value and typed_value" ) ;
922946 }
923947 match data_type {
924- DataType :: Null => Variant :: Null ,
948+ DataType :: Null => Ok ( Variant :: Null ) ,
925949 DataType :: Boolean => {
926950 let boolean_array = typed_value. as_boolean ( ) ;
927951 let value = boolean_array. value ( index) ;
928- Variant :: from ( value)
952+ Ok ( Variant :: from ( value) )
929953 }
930954 // 16-byte FixedSizeBinary alway corresponds to a UUID; all other sizes are illegal.
931955 DataType :: FixedSizeBinary ( 16 ) => {
932956 let array = typed_value. as_fixed_size_binary ( ) ;
933957 let value = array. value ( index) ;
934- Uuid :: from_slice ( value) . unwrap ( ) . into ( ) // unwrap is safe: slice is always 16 bytes
958+ Ok ( Uuid :: from_slice ( value) . unwrap ( ) . into ( ) ) // unwrap is safe: slice is always 16 bytes
935959 }
936960 DataType :: BinaryView => {
937961 let array = typed_value. as_binary_view ( ) ;
938962 let value = array. value ( index) ;
939- Variant :: from ( value)
963+ Ok ( Variant :: from ( value) )
940964 }
941965 DataType :: Utf8 => {
942966 let array = typed_value. as_string :: < i32 > ( ) ;
943967 let value = array. value ( index) ;
944- Variant :: from ( value)
968+ Ok ( Variant :: from ( value) )
945969 }
946970 DataType :: Int8 => {
947971 primitive_conversion_single_value ! ( Int8Type , typed_value, index)
@@ -965,28 +989,28 @@ fn typed_value_to_variant<'a>(
965989 primitive_conversion_single_value ! ( Float64Type , typed_value, index)
966990 }
967991 DataType :: Decimal32 ( _, s) => {
968- generic_conversion_single_value ! (
992+ generic_conversion_single_value_with_result ! (
969993 Decimal32Type ,
970994 as_primitive,
971- |v| VariantDecimal4 :: try_new( v, * s as u8 ) . map_or ( Variant :: Null , Variant :: from ) ,
995+ |v| VariantDecimal4 :: try_new( v, * s as u8 ) ,
972996 typed_value,
973997 index
974998 )
975999 }
9761000 DataType :: Decimal64 ( _, s) => {
977- generic_conversion_single_value ! (
1001+ generic_conversion_single_value_with_result ! (
9781002 Decimal64Type ,
9791003 as_primitive,
980- |v| VariantDecimal8 :: try_new( v, * s as u8 ) . map_or ( Variant :: Null , Variant :: from ) ,
1004+ |v| VariantDecimal8 :: try_new( v, * s as u8 ) ,
9811005 typed_value,
9821006 index
9831007 )
9841008 }
9851009 DataType :: Decimal128 ( _, s) => {
986- generic_conversion_single_value ! (
1010+ generic_conversion_single_value_with_result ! (
9871011 Decimal128Type ,
9881012 as_primitive,
989- |v| VariantDecimal16 :: try_new( v, * s as u8 ) . map_or ( Variant :: Null , Variant :: from ) ,
1013+ |v| VariantDecimal16 :: try_new( v, * s as u8 ) ,
9901014 typed_value,
9911015 index
9921016 )
@@ -1001,14 +1025,14 @@ fn typed_value_to_variant<'a>(
10011025 )
10021026 }
10031027 DataType :: Time64 ( TimeUnit :: Microsecond ) => {
1004- generic_conversion_single_value ! (
1028+ generic_conversion_single_value_with_result ! (
10051029 Time64MicrosecondType ,
10061030 as_primitive,
10071031 |v| NaiveTime :: from_num_seconds_from_midnight_opt(
10081032 ( v / 1_000_000 ) as u32 ,
10091033 ( v % 1_000_000 ) as u32 * 1000
10101034 )
1011- . map_or ( Variant :: Null , Variant :: from ) ,
1035+ . ok_or_else ( || format! ( "Invalid microsecond from midnight: {}" , v ) ) ,
10121036 typed_value,
10131037 index
10141038 )
@@ -1060,7 +1084,7 @@ fn typed_value_to_variant<'a>(
10601084 "Unsupported typed_value type: {}" ,
10611085 typed_value. data_type( )
10621086 ) ;
1063- Variant :: Null
1087+ Ok ( Variant :: Null )
10641088 }
10651089 }
10661090}
@@ -1075,7 +1099,7 @@ fn typed_value_to_variant<'a>(
10751099/// * `StructArray<metadata: BinaryView, value: BinaryView>`
10761100///
10771101/// So cast them to get the right type.
1078- fn cast_to_binary_view_arrays ( array : & dyn Array ) -> Result < ArrayRef , ArrowError > {
1102+ fn cast_to_binary_view_arrays ( array : & dyn Array ) -> Result < ArrayRef > {
10791103 let new_type = canonicalize_and_verify_data_type ( array. data_type ( ) ) ?;
10801104 if let Cow :: Borrowed ( _) = new_type {
10811105 if let Some ( array) = array. as_struct_opt ( ) {
@@ -1088,9 +1112,7 @@ fn cast_to_binary_view_arrays(array: &dyn Array) -> Result<ArrayRef, ArrowError>
10881112/// Recursively visits a data type, ensuring that it only contains data types that can legally
10891113/// appear in a (possibly shredded) variant array. It also replaces Binary fields with BinaryView,
10901114/// since that's what comes back from the parquet reader and what the variant code expects to find.
1091- fn canonicalize_and_verify_data_type (
1092- data_type : & DataType ,
1093- ) -> Result < Cow < ' _ , DataType > , ArrowError > {
1115+ fn canonicalize_and_verify_data_type ( data_type : & DataType ) -> Result < Cow < ' _ , DataType > > {
10941116 use DataType :: * ;
10951117
10961118 // helper macros
@@ -1188,7 +1210,7 @@ fn canonicalize_and_verify_data_type(
11881210 Ok ( new_data_type)
11891211}
11901212
1191- fn canonicalize_and_verify_field ( field : & Arc < Field > ) -> Result < Cow < ' _ , Arc < Field > > , ArrowError > {
1213+ fn canonicalize_and_verify_field ( field : & Arc < Field > ) -> Result < Cow < ' _ , Arc < Field > > > {
11921214 let Cow :: Owned ( new_data_type) = canonicalize_and_verify_data_type ( field. data_type ( ) ) ? else {
11931215 return Ok ( Cow :: Borrowed ( field) ) ;
11941216 } ;
@@ -1199,11 +1221,15 @@ fn canonicalize_and_verify_field(field: &Arc<Field>) -> Result<Cow<'_, Arc<Field
11991221#[ cfg( test) ]
12001222mod test {
12011223 use crate :: VariantArrayBuilder ;
1224+ use std:: str:: FromStr ;
12021225
12031226 use super :: * ;
1204- use arrow:: array:: { BinaryViewArray , Int32Array } ;
1227+ use arrow:: array:: {
1228+ BinaryViewArray , Decimal32Array , Decimal64Array , Decimal128Array , Int32Array ,
1229+ Time64MicrosecondArray ,
1230+ } ;
12051231 use arrow_schema:: { Field , Fields } ;
1206- use parquet_variant:: ShortString ;
1232+ use parquet_variant:: { EMPTY_VARIANT_METADATA_BYTES , ShortString } ;
12071233
12081234 #[ test]
12091235 fn invalid_not_a_struct_array ( ) {
@@ -1535,4 +1561,65 @@ mod test {
15351561 assert_ne ! ( v, v_sliced) ;
15361562 }
15371563 }
1564+
1565+ macro_rules! invalid_variant_array_test {
1566+ ( $fn_name: ident, $invalid_typed_value: expr, $error_msg: literal) => {
1567+ #[ test]
1568+ fn $fn_name( ) {
1569+ let metadata = BinaryViewArray :: from_iter_values( std:: iter:: repeat_n(
1570+ EMPTY_VARIANT_METADATA_BYTES ,
1571+ 1 ,
1572+ ) ) ;
1573+ let invalid_typed_value = $invalid_typed_value;
1574+
1575+ let struct_array = StructArrayBuilder :: new( )
1576+ . with_field( "metadata" , Arc :: new( metadata) , false )
1577+ . with_field( "typed_value" , Arc :: new( invalid_typed_value) , true )
1578+ . build( ) ;
1579+
1580+ let array: VariantArray = VariantArray :: try_new( & struct_array)
1581+ . expect( "should create variant array" )
1582+ . into( ) ;
1583+
1584+ let result = array. try_value( 0 ) ;
1585+ assert!( result. is_err( ) ) ;
1586+ let error = result. unwrap_err( ) ;
1587+ assert!( matches!( error, ArrowError :: CastError ( _) ) ) ;
1588+
1589+ let expected: & str = $error_msg;
1590+ assert!(
1591+ error. to_string( ) . contains( $error_msg) ,
1592+ "error `{}` did not contain `{}`" ,
1593+ error,
1594+ expected
1595+ )
1596+ }
1597+ } ;
1598+ }
1599+
1600+ invalid_variant_array_test ! (
1601+ test_variant_array_invalide_time,
1602+ Time64MicrosecondArray :: from( vec![ Some ( 86401000000 ) ] ) ,
1603+ "Cast error: Cast failed at index 0 (array type: Time64(µs)): Invalid microsecond from midnight: 86401000000"
1604+ ) ;
1605+
1606+ invalid_variant_array_test ! (
1607+ test_variant_array_invalid_decimal32,
1608+ Decimal32Array :: from( vec![ Some ( 1234567890 ) ] ) ,
1609+ "Cast error: Cast failed at index 0 (array type: Decimal32(9, 2)): Invalid argument error: 1234567890 is wider than max precision 9"
1610+ ) ;
1611+
1612+ invalid_variant_array_test ! (
1613+ test_variant_array_invalid_decimal64,
1614+ Decimal64Array :: from( vec![ Some ( 1234567890123456789 ) ] ) ,
1615+ "Cast error: Cast failed at index 0 (array type: Decimal64(18, 6)): Invalid argument error: 1234567890123456789 is wider than max precision 18"
1616+ ) ;
1617+
1618+ invalid_variant_array_test ! (
1619+ test_variant_array_invalid_decimal128,
1620+ Decimal128Array :: from( vec![ Some (
1621+ i128 :: from_str( "123456789012345678901234567890123456789" ) . unwrap( )
1622+ ) , ] ) ,
1623+ "Cast error: Cast failed at index 0 (array type: Decimal128(38, 10)): Invalid argument error: 123456789012345678901234567890123456789 is wider than max precision 38"
1624+ ) ;
15381625}
0 commit comments