From 9ea3931c11265d8bb67a99dff3d8994c1ba3575e Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Mon, 10 Feb 2025 15:57:48 +0000 Subject: [PATCH 1/3] start prepping for datafusion 46 --- Cargo.toml | 5 ++++- src/rewrite.rs | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 77cec19..b1773ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ datafusion = { version = "45", default-features = false, features = ["nested_exp codspeed-criterion-compat = "2.6" criterion = "0.5.1" clap = "4" -tokio = { version = "1.38", features = ["full"] } +tokio = { version = "1.43", features = ["full"] } [lints.clippy] dbg_macro = "deny" @@ -34,3 +34,6 @@ pedantic = { level = "deny", priority = -1 } [[bench]] name = "main" harness = false + +[patch.crates-io] +datafusion = { git = "https://github.com/apache/datafusion.git", rev = "5239d1ac" } diff --git a/src/rewrite.rs b/src/rewrite.rs index 72637a4..4161154 100644 --- a/src/rewrite.rs +++ b/src/rewrite.rs @@ -141,7 +141,11 @@ impl std::fmt::Display for JsonOperator { /// Convert an Expr to a String representatiion for use in alias names. fn expr_to_sql_repr(expr: &Expr) -> String { match expr { - Expr::Column(Column { name, relation }) => relation + Expr::Column(Column { + name, + relation, + spans: _, + }) => relation .as_ref() .map_or_else(|| name.clone(), |r| format!("{r}.{name}")), Expr::Alias(alias) => alias.name.clone(), From 36f10919c99f9d0ab5ce87d4ba884f83fb778c75 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 11 Mar 2025 10:27:27 -0500 Subject: [PATCH 2/3] update --- Cargo.toml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b1773ee..cbccda1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,13 +11,13 @@ repository = "https://github.com/datafusion-contrib/datafusion-functions-json/" rust-version = "1.81.0" [dependencies] -datafusion = { version = "45", default-features = false } +datafusion = { version = "46", default-features = false } jiter = "0.8" paste = "1" log = "0.4" [dev-dependencies] -datafusion = { version = "45", default-features = false, features = ["nested_expressions"] } +datafusion = { version = "46", default-features = false, features = ["nested_expressions"] } codspeed-criterion-compat = "2.6" criterion = "0.5.1" clap = "4" @@ -34,6 +34,3 @@ pedantic = { level = "deny", priority = -1 } [[bench]] name = "main" harness = false - -[patch.crates-io] -datafusion = { git = "https://github.com/apache/datafusion.git", rev = "5239d1ac" } From e45026c94a24f687276cb1aa941656c22b226e4e Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 11 Mar 2025 12:02:13 -0500 Subject: [PATCH 3/3] fix --- Cargo.toml | 4 ++-- src/json_length.rs | 2 +- src/json_object_keys.rs | 2 +- tests/main.rs | 13 ++++++++----- tests/utils/mod.rs | 35 +++++++++++++++++++++++++++++------ 5 files changed, 41 insertions(+), 15 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cbccda1..237c8b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,11 +8,11 @@ license = "Apache-2.0" keywords = ["datafusion", "JSON", "SQL"] categories = ["database-implementations", "parsing"] repository = "https://github.com/datafusion-contrib/datafusion-functions-json/" -rust-version = "1.81.0" +rust-version = "1.82.0" [dependencies] datafusion = { version = "46", default-features = false } -jiter = "0.8" +jiter = "0.9" paste = "1" log = "0.4" diff --git a/src/json_length.rs b/src/json_length.rs index 52185b2..8d0bfd3 100644 --- a/src/json_length.rs +++ b/src/json_length.rs @@ -14,7 +14,7 @@ make_udf_function!( JsonLength, json_length, json_data path, - r#"Get the length of the array or object at the given path."# + r"Get the length of the array or object at the given path." ); #[derive(Debug)] diff --git a/src/json_object_keys.rs b/src/json_object_keys.rs index 73b8d19..04cac0e 100644 --- a/src/json_object_keys.rs +++ b/src/json_object_keys.rs @@ -14,7 +14,7 @@ make_udf_function!( JsonObjectKeys, json_object_keys, json_data path, - r#"Get the keys of a JSON object as an array."# + r"Get the keys of a JSON object as an array." ); #[derive(Debug)] diff --git a/tests/main.rs b/tests/main.rs index 12c75f3..0e65338 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -1310,9 +1310,12 @@ fn check_for_null_dictionary_values(array: &dyn Array) { if values_array.is_null(i) { // keys should not contain if keys.contains(&i) { - println!("keys: {:?}", keys); - println!("values: {:?}", values_array); - panic!("keys should not contain null values"); + #[allow(clippy::print_stdout)] + { + println!("keys: {keys:?}"); + println!("values: {values_array:?}"); + panic!("keys should not contain null values"); + } } } } @@ -1341,7 +1344,7 @@ async fn test_dict_get_no_null_values() { "| |", "+------------+", ]; - let batches = ctx.sql(&sql).await.unwrap().collect().await.unwrap(); + let batches = ctx.sql(sql).await.unwrap().collect().await.unwrap(); assert_batches_eq!(expected, &batches); for batch in batches { check_for_null_dictionary_values(batch.column(0).as_ref()); @@ -1352,7 +1355,7 @@ async fn test_dict_get_no_null_values() { "+------+", "| v |", "+------+", "| |", "| fizz |", "| |", "| abcd |", "| |", "| fizz |", "| fizz |", "| fizz |", "| fizz |", "| |", "+------+", ]; - let batches = ctx.sql(&sql).await.unwrap().collect().await.unwrap(); + let batches = ctx.sql(sql).await.unwrap().collect().await.unwrap(); assert_batches_eq!(expected, &batches); for batch in batches { check_for_null_dictionary_values(batch.column(0).as_ref()); diff --git a/tests/utils/mod.rs b/tests/utils/mod.rs index dab3d4e..541d223 100644 --- a/tests/utils/mod.rs +++ b/tests/utils/mod.rs @@ -20,6 +20,7 @@ pub async fn create_context() -> Result { Ok(ctx) } +#[expect(clippy::too_many_lines)] async fn create_test_table(large_utf8: bool, dict_encoded: bool) -> Result { let ctx = create_context().await?; @@ -42,7 +43,7 @@ async fn create_test_table(large_utf8: bool, dict_encoded: bool) -> Result::new( - Int32Array::from_iter_values(0..(json_array.len() as i32)), + Int32Array::from_iter_values(0..(i32::try_from(json_array.len()).expect("fits in a i32"))), json_array, )); } @@ -160,13 +161,23 @@ async fn create_test_table(large_utf8: bool, dict_encoded: bool) -> Result::new( - UInt32Array::from_iter_values(dict_data.iter().enumerate().map(|(id, _)| id as u32)), + UInt32Array::from_iter_values( + dict_data + .iter() + .enumerate() + .map(|(id, _)| u32::try_from(id).expect("fits in a u32")), + ), Arc::new(StringArray::from( dict_data.iter().map(|(json, _, _, _)| *json).collect::>(), )), )), Arc::new(DictionaryArray::::new( - UInt8Array::from_iter_values(dict_data.iter().enumerate().map(|(id, _)| id as u8)), + UInt8Array::from_iter_values( + dict_data + .iter() + .enumerate() + .map(|(id, _)| u8::try_from(id).expect("fits in a u8")), + ), Arc::new(LargeStringArray::from( dict_data .iter() @@ -175,7 +186,12 @@ async fn create_test_table(large_utf8: bool, dict_encoded: bool) -> Result::new( - UInt8Array::from_iter_values(dict_data.iter().enumerate().map(|(id, _)| id as u8)), + UInt8Array::from_iter_values( + dict_data + .iter() + .enumerate() + .map(|(id, _)| u8::try_from(id).expect("fits in a u8")), + ), Arc::new(StringViewArray::from( dict_data .iter() @@ -184,9 +200,16 @@ async fn create_test_table(large_utf8: bool, dict_encoded: bool) -> Result::new( - Int64Array::from_iter_values(dict_data.iter().enumerate().map(|(id, _)| id as i64)), + Int64Array::from_iter_values( + dict_data + .iter() + .enumerate() + .map(|(id, _)| i64::try_from(id).expect("fits in a i64")), + ), Arc::new(UInt64Array::from_iter_values( - dict_data.iter().map(|(_, _, _, int_key)| *int_key as u64), + dict_data + .iter() + .map(|(_, _, _, int_key)| u64::try_from(*int_key).expect("not negative")), )), )), ],