From 01c02be3085390be5ac465a2d4a22d4df71f166b Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Fri, 29 Aug 2025 11:22:55 -0700 Subject: [PATCH 01/28] init iceberg connector libs --- Cargo.lock | 8 +++++ Cargo.toml | 4 +-- core/connectors/sinks/iceberg_sink/Cargo.toml | 6 ++++ core/connectors/sinks/iceberg_sink/README.md | 0 core/connectors/sinks/iceberg_sink/src/lib.rs | 32 +++++++++++++++++++ .../sources/iceberg_source/Cargo.toml | 6 ++++ .../sources/iceberg_source/README.md | 0 .../sources/iceberg_source/src/lib.rs | 32 +++++++++++++++++++ 8 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 core/connectors/sinks/iceberg_sink/Cargo.toml create mode 100644 core/connectors/sinks/iceberg_sink/README.md create mode 100644 core/connectors/sinks/iceberg_sink/src/lib.rs create mode 100644 core/connectors/sources/iceberg_source/Cargo.toml create mode 100644 core/connectors/sources/iceberg_source/README.md create mode 100644 core/connectors/sources/iceberg_source/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index dc1ec2656..e273c094d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3674,6 +3674,14 @@ dependencies = [ "cc", ] +[[package]] +name = "iceberg_sink" +version = "0.1.0" + +[[package]] +name = "iceberg_source" +version = "0.1.0" + [[package]] name = "icu_collections" version = "2.0.0" diff --git a/Cargo.toml b/Cargo.toml index b379d95d3..09aee1f68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,10 +33,10 @@ members = [ "core/cli", "core/common", "core/connectors/runtime", - "core/connectors/sdk", + "core/connectors/sdk", "core/connectors/sinks/iceberg_sink", "core/connectors/sinks/postgres_sink", "core/connectors/sinks/quickwit_sink", - "core/connectors/sinks/stdout_sink", + "core/connectors/sinks/stdout_sink", "core/connectors/sources/iceberg_source", "core/connectors/sources/postgres_source", "core/connectors/sources/random_source", "core/integration", diff --git a/core/connectors/sinks/iceberg_sink/Cargo.toml b/core/connectors/sinks/iceberg_sink/Cargo.toml new file mode 100644 index 000000000..b0e9bcb82 --- /dev/null +++ b/core/connectors/sinks/iceberg_sink/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "iceberg_sink" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/core/connectors/sinks/iceberg_sink/README.md b/core/connectors/sinks/iceberg_sink/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs new file mode 100644 index 000000000..fc4426b63 --- /dev/null +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -0,0 +1,32 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +pub fn add(left: u64, right: u64) -> u64 { + left + right +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); + } +} diff --git a/core/connectors/sources/iceberg_source/Cargo.toml b/core/connectors/sources/iceberg_source/Cargo.toml new file mode 100644 index 000000000..848674985 --- /dev/null +++ b/core/connectors/sources/iceberg_source/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "iceberg_source" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/core/connectors/sources/iceberg_source/README.md b/core/connectors/sources/iceberg_source/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/core/connectors/sources/iceberg_source/src/lib.rs b/core/connectors/sources/iceberg_source/src/lib.rs new file mode 100644 index 000000000..fc4426b63 --- /dev/null +++ b/core/connectors/sources/iceberg_source/src/lib.rs @@ -0,0 +1,32 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +pub fn add(left: u64, right: u64) -> u64 { + left + right +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); + } +} From 7e16b139f86852705afd7a2af88a0495ae2542f9 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 1 Sep 2025 13:17:28 -0700 Subject: [PATCH 02/28] Init iceberg sink --- Cargo.lock | 19 ++- Cargo.toml | 6 +- core/connectors/sinks/iceberg_sink/Cargo.toml | 41 ++++++- .../sinks/iceberg_sink/example.toml | 115 ++++++++++++++++++ core/connectors/sinks/iceberg_sink/src/lib.rs | 91 +++++++++++++- 5 files changed, 260 insertions(+), 12 deletions(-) create mode 100644 core/connectors/sinks/iceberg_sink/example.toml diff --git a/Cargo.lock b/Cargo.lock index e273c094d..50f35f118 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3674,10 +3674,6 @@ dependencies = [ "cc", ] -[[package]] -name = "iceberg_sink" -version = "0.1.0" - [[package]] name = "iceberg_source" version = "0.1.0" @@ -3974,6 +3970,21 @@ dependencies = [ "tracing", ] +[[package]] +name = "iggy_connector_iceberg_sink" +version = "0.1.0" +dependencies = [ + "async-trait", + "dashmap", + "iggy_connector_sdk", + "once_cell", + "reqwest", + "serde", + "serde_yml", + "simd-json", + "tracing", +] + [[package]] name = "iggy_connector_postgres_sink" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 09aee1f68..0fe58f0fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,10 +33,12 @@ members = [ "core/cli", "core/common", "core/connectors/runtime", - "core/connectors/sdk", "core/connectors/sinks/iceberg_sink", + "core/connectors/sdk", + "core/connectors/sinks/iceberg_sink", "core/connectors/sinks/postgres_sink", "core/connectors/sinks/quickwit_sink", - "core/connectors/sinks/stdout_sink", "core/connectors/sources/iceberg_source", + "core/connectors/sinks/stdout_sink", + "core/connectors/sources/iceberg_source", "core/connectors/sources/postgres_source", "core/connectors/sources/random_source", "core/integration", diff --git a/core/connectors/sinks/iceberg_sink/Cargo.toml b/core/connectors/sinks/iceberg_sink/Cargo.toml index b0e9bcb82..0ac7aa40d 100644 --- a/core/connectors/sinks/iceberg_sink/Cargo.toml +++ b/core/connectors/sinks/iceberg_sink/Cargo.toml @@ -1,6 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + [package] -name = "iceberg_sink" +name = "iggy_connector_iceberg_sink" version = "0.1.0" edition = "2024" +license = "Apache-2.0" +keywords = ["iggy", "messaging", "streaming"] +categories = ["command-line-utilities", "database", "network-programming"] +homepage = "https://iggy.apache.org" +documentation = "https://iggy.apache.org/docs" +repository = "https://github.com/apache/iggy" +readme = "../../README.md" + +[package.metadata.cargo-machete] +ignored = ["dashmap", "once_cell"] [dependencies] +async-trait = { workspace = true } +dashmap = { workspace = true } +iggy_connector_sdk = { workspace = true } +once_cell = { workspace = true } +reqwest = { workspace = true } +serde = { workspace = true } +serde_yml = { workspace = true } +simd-json = { workspace = true } +tracing = { workspace = true } + +[lib] +crate-type = ["cdylib", "lib"] diff --git a/core/connectors/sinks/iceberg_sink/example.toml b/core/connectors/sinks/iceberg_sink/example.toml new file mode 100644 index 000000000..46afd7013 --- /dev/null +++ b/core/connectors/sinks/iceberg_sink/example.toml @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +[http_api] # Optional HTTP API configuration +enabled = true +address = "127.0.0.1:8081" +# api_key = "secret" # Optional API key for authentication to be passed as `api-key` header + +[http_api.cors] # Optional CORS configuration for HTTP API +enabled = false +allowed_methods = ["GET", "POST", "PUT", "DELETE"] +allowed_origins = ["*"] +allowed_headers = ["content-type"] +exposed_headers = [""] +allow_credentials = false +allow_private_network = false + +[http_api.tls] # Optional TLS configuration for HTTP API +enabled = false +cert_file = "core/certs/iggy_cert.pem" +key_file = "core/certs/iggy_key.pem" + +[iggy] +address = "localhost:8090" +username = "iggy" +password = "iggy" +# token = "secret" # Personal Access Token (PAT) can be used instead of username and password + +[state] +path = "local_state" + +[sinks.iceberg] +enabled = true +name = "Iceberg sink" +path = "target/release/libiggy_connector_iceberg_sink" + +[[sinks.iceberg.streams]] +stream = "iceberg" +topics = ["taxis"] +# schema = "json" +batch_length = 10 +poll_interval = "5ms" +consumer_group = "iceberg_sink_connector" + +[sinks.iceberg.catalog] +tables = ["nyc.taxis"] +type = "rest" +uri = "https://localhost" +credential = "12345" +warehouse = "warehouse" + + +[sinks.stdout] +enabled = true +name = "Stdout sink" +path = "target/release/libiggy_connector_stdout_sink" + +[[sinks.stdout.streams]] +stream = "example_stream" +topics = ["example_topic"] +schema = "json" +batch_length = 100 +poll_interval = "5ms" +consumer_group = "stdout_sink_connector" + +[sinks.stdout.config] +print_payload = false + +[sinks.stdout.transforms.add_fields] +enabled = true + +[[sinks.stdout.transforms.add_fields.fields]] +key = "message" +value.static = "hello" + +[sources.random] +enabled = true +name = "Random source" +path = "target/release/libiggy_connector_random_source" +config_format = "json" + +[[sources.random.streams]] +stream = "example_stream" +topic = "example_topic" +schema = "json" +batch_length = 1000 +linger_time = "5ms" + +[sources.random.config] +interval = "100ms" +# max_count = 1000 +messages_range = [10, 50] +payload_size = 200 + +[sources.random.transforms.add_fields] +enabled = true + +[[sources.random.transforms.add_fields.fields]] +key = "test_field" +value.static = "hello!" diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index fc4426b63..2ee6f6d54 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -16,17 +16,98 @@ * under the License. */ -pub fn add(left: u64, right: u64) -> u64 { - left + right +use async_trait::async_trait; +use iggy_connector_sdk::{ + sink_connector, ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, +}; +use serde::{Deserialize, Serialize}; +use tracing::{error, info, warn}; + +#[derive(Debug, Serialize, Deserialize)] +pub enum IcebergSinkTypes { + REST, + HDFS, +} + +sink_connector!(IcebergSink); + +#[derive(Debug)] +pub struct IcebergSink { + id: u32, + config: IcebergSinkConfig, + client: reqwest::Client, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct IcebergSinkConfig { + tables: Vec, + catalog_type: String, + credential: String, + warehouse: String, + uri: String, +} + +impl IcebergSink { + pub fn new(id: u32, config: IcebergSinkConfig) -> Self { + IcebergSink { + id, + config, + client: reqwest::Client::new(), + } + } +} + +#[async_trait] +impl Sink for IcebergSink { + async fn open(&mut self) -> Result<(), Error> { + info!( + "Opened Quickwit sink connector with ID: {} for URL: {}", + self.id, self.config.uri + ); + Ok(()) + } + + async fn consume( + &self, + _topic_metadata: &TopicMetadata, + messages_metadata: MessagesMetadata, + messages: Vec, + ) -> Result<(), Error> { + info!( + "Iceberg sink with ID: {} received: {} messages, format: {}", + self.id, + messages.len(), + messages_metadata.schema + ); + + let mut json_payloads = Vec::with_capacity(messages.len()); + for message in messages { + match message.payload { + Payload::Json(value) => json_payloads.push(value), + _ => { + warn!("Unsupported payload format: {}", messages_metadata.schema); + } + } + } + + if json_payloads.is_empty() { + return Ok(()); + } + + Ok(()) + } + + async fn close(&mut self) -> Result<(), Error> { + info!("Iceberg sink connector with ID: {} is closed.", self.id); + Ok(()) + } } #[cfg(test)] mod tests { - use super::*; #[test] fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); + assert_eq!(true, true); } } From 98cc24091ca12a397114555c03e860d79b6bc0fe Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Tue, 9 Sep 2025 13:14:48 -0700 Subject: [PATCH 03/28] Iceberg sink works mvp --- Cargo.lock | 753 +++++++++++++++++- Cargo.toml | 6 + core/connectors/sinks/iceberg_sink/Cargo.toml | 9 + core/connectors/sinks/iceberg_sink/src/lib.rs | 298 ++++++- .../connectors/sinks/quickwit_sink/src/lib.rs | 2 +- .../sources/iceberg_source/Cargo.toml | 17 + 6 files changed, 1056 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50f35f118..577049fe6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -238,6 +238,12 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + [[package]] name = "aead" version = "0.5.2" @@ -291,6 +297,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", + "const-random", "getrandom 0.3.3", "once_cell", "serde", @@ -405,6 +412,30 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c" +[[package]] +name = "apache-avro" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aef82843a0ec9f8b19567445ad2421ceeb1d711514384bdd3d49fe37102ee13" +dependencies = [ + "bigdecimal", + "digest", + "libflate", + "log", + "num-bigint", + "quad-rand", + "rand 0.8.5", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "strum 0.26.3", + "strum_macros 0.26.4", + "thiserror 1.0.69", + "typed-builder 0.19.1", + "uuid", +] + [[package]] name = "arbitrary" version = "1.4.1" @@ -420,6 +451,12 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +[[package]] +name = "array-init" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" + [[package]] name = "arraydeque" version = "0.5.1" @@ -438,6 +475,170 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "arrow-arith" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num", +] + +[[package]] +name = "arrow-array" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" +dependencies = [ + "ahash 0.8.12", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.15.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64 0.22.1", + "chrono", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-data" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", +] + +[[package]] +name = "arrow-json" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap 2.10.0", + "lexical-core", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-ord" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] + +[[package]] +name = "arrow-schema" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" + +[[package]] +name = "arrow-select" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" +dependencies = [ + "ahash 0.8.12", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax 0.8.5", +] + +[[package]] +name = "as-any" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063" + [[package]] name = "assert_cmd" version = "2.0.17" @@ -818,6 +1019,17 @@ dependencies = [ "tower-service", ] +[[package]] +name = "backon" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "592277618714fbcecda9a02ba7a8781f319d26532a88553bbacc77ba5d2b3a8d" +dependencies = [ + "fastrand", + "gloo-timers 0.3.0", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.75" @@ -970,6 +1182,26 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "bigdecimal" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", +] + +[[package]] +name = "bimap" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" + [[package]] name = "bincode" version = "1.3.3" @@ -1681,6 +1913,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -1705,6 +1946,15 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -1969,6 +2219,12 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "dary_heap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" + [[package]] name = "dashmap" version = "6.1.0" @@ -2184,6 +2440,12 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "dissimilar" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" + [[package]] name = "dlopen2" version = "0.8.0" @@ -2425,6 +2687,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "expect-test" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63af43ff4431e848fb47472a920f14fa71c24de13255a5692e93d4e90302acb0" +dependencies = [ + "dissimilar", + "once_cell", +] + [[package]] name = "ext-trait" version = "1.0.1" @@ -2817,7 +3089,7 @@ dependencies = [ "syn 2.0.104", "textwrap", "thiserror 1.0.69", - "typed-builder", + "typed-builder 0.15.2", ] [[package]] @@ -3339,6 +3611,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + [[package]] name = "halfbrown" version = "0.3.0" @@ -3388,6 +3671,10 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.12", + "allocator-api2", +] [[package]] name = "hashbrown" @@ -3674,6 +3961,82 @@ dependencies = [ "cc", ] +[[package]] +name = "iceberg" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "306fd4bf70d30687dc765110ecd19fc2bb21f16c3d5c188bc53a0d573bb6e675" +dependencies = [ + "anyhow", + "apache-avro", + "array-init", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-ord", + "arrow-schema", + "arrow-select", + "arrow-string", + "as-any", + "async-trait", + "backon", + "base64 0.22.1", + "bimap", + "bytes", + "chrono", + "derive_builder", + "expect-test", + "fnv", + "futures", + "itertools 0.13.0", + "moka", + "murmur3", + "num-bigint", + "once_cell", + "opendal", + "ordered-float 4.6.0", + "parquet", + "rand 0.8.5", + "reqwest", + "roaring", + "rust_decimal", + "serde", + "serde_bytes", + "serde_derive", + "serde_json", + "serde_repr", + "serde_with", + "strum 0.27.2", + "thrift", + "tokio", + "typed-builder 0.20.1", + "url", + "uuid", + "zstd", +] + +[[package]] +name = "iceberg-catalog-rest" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e6a0dc30703b0cbb7d3c245126936d92015f93ab3ac52e20edc011f42934628" +dependencies = [ + "async-trait", + "chrono", + "http 1.3.1", + "iceberg", + "itertools 0.13.0", + "reqwest", + "serde", + "serde_derive", + "serde_json", + "tokio", + "tracing", + "typed-builder 0.20.1", + "uuid", +] + [[package]] name = "iceberg_source" version = "0.1.0" @@ -3892,7 +4255,7 @@ dependencies = [ "serde", "serde_json", "serde_yml", - "strum", + "strum 0.27.2", "thiserror 2.0.14", "tokio", "toml 0.9.5", @@ -3913,7 +4276,7 @@ dependencies = [ "iggy", "rmcp", "serde", - "strum", + "strum 0.27.2", "thiserror 2.0.14", "tokio", "tower-http", @@ -3964,7 +4327,7 @@ dependencies = [ "serde", "serde_json", "serde_with", - "strum", + "strum 0.27.2", "thiserror 2.0.14", "tokio", "tracing", @@ -3974,15 +4337,24 @@ dependencies = [ name = "iggy_connector_iceberg_sink" version = "0.1.0" dependencies = [ + "arrow-array", + "arrow-json", "async-trait", + "chrono", "dashmap", + "iceberg", + "iceberg-catalog-rest", "iggy_connector_sdk", "once_cell", + "parquet", "reqwest", + "rust-s3", "serde", + "serde_json", "serde_yml", "simd-json", "tracing", + "uuid", ] [[package]] @@ -4074,7 +4446,7 @@ dependencies = [ "serde", "serde_json", "simd-json", - "strum_macros", + "strum_macros 0.27.2", "thiserror 2.0.14", "tokio", "tracing", @@ -4238,6 +4610,12 @@ dependencies = [ "web-sys", ] +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + [[package]] name = "integration" version = "0.0.1" @@ -4552,6 +4930,70 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "lexical-core" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libbz2-rs-sys" version = "0.2.2" @@ -4574,6 +5016,30 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libflate" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e" +dependencies = [ + "adler32", + "core2", + "crc32fast", + "dary_heap", + "libflate_lz77", +] + +[[package]] +name = "libflate_lz77" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d" +dependencies = [ + "core2", + "hashbrown 0.14.5", + "rle-decode-fast", +] + [[package]] name = "libgit2-sys" version = "0.18.2+1.9.1" @@ -4794,6 +5260,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash", +] + [[package]] name = "macro_rules_attribute" version = "0.1.3" @@ -4995,6 +5470,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "murmur3" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" + [[package]] name = "nanorand" version = "0.7.0" @@ -5147,6 +5628,7 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", + "serde", ] [[package]] @@ -5342,6 +5824,34 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" +[[package]] +name = "opendal" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb9838d0575c6dbaf3fcec7255af8d5771996d4af900bbb6fa9a314dec00a1a" +dependencies = [ + "anyhow", + "backon", + "base64 0.22.1", + "bytes", + "chrono", + "crc32c", + "futures", + "getrandom 0.2.16", + "http 1.3.1", + "http-body", + "log", + "md-5", + "percent-encoding", + "quick-xml 0.37.5", + "reqsign", + "reqwest", + "serde", + "serde_json", + "tokio", + "uuid", +] + [[package]] name = "openssl" version = "0.10.73" @@ -5497,6 +6007,24 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ordered-float" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" +dependencies = [ + "num-traits", +] + [[package]] name = "ordered-multimap" version = "0.7.3" @@ -5567,6 +6095,41 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "parquet" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa" +dependencies = [ + "ahash 0.8.12", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64 0.22.1", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.5", + "lz4_flex", + "num", + "num-bigint", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", +] + [[package]] name = "passterm" version = "2.0.1" @@ -6132,6 +6695,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "quad-rand" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" + [[package]] name = "quanta" version = "0.12.6" @@ -6167,6 +6736,16 @@ dependencies = [ "serde", ] +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quinn" version = "0.11.8" @@ -6460,6 +7039,35 @@ dependencies = [ "bytecheck", ] +[[package]] +name = "reqsign" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" +dependencies = [ + "anyhow", + "async-trait", + "base64 0.22.1", + "chrono", + "form_urlencoded", + "getrandom 0.2.16", + "hex", + "hmac", + "home", + "http 1.3.1", + "log", + "percent-encoding", + "quick-xml 0.37.5", + "rand 0.8.5", + "reqwest", + "rust-ini", + "serde", + "serde_json", + "sha1", + "sha2", + "tokio", +] + [[package]] name = "reqwest" version = "0.12.22" @@ -6591,6 +7199,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "rle-decode-fast" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" + [[package]] name = "rmcp" version = "0.5.0" @@ -6636,6 +7250,16 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "roaring" +version = "0.10.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" +dependencies = [ + "bytemuck", + "byteorder", +] + [[package]] name = "ron" version = "0.8.1" @@ -7055,6 +7679,12 @@ dependencies = [ "serde", ] +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + [[package]] name = "serde" version = "1.0.219" @@ -7086,6 +7716,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "serde_bytes" +version = "0.11.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" +dependencies = [ + "serde", +] + [[package]] name = "serde_derive" version = "1.0.219" @@ -7130,6 +7769,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "serde_spanned" version = "0.6.9" @@ -7283,7 +7933,7 @@ dependencies = [ "serde_with", "serial_test", "static-toml", - "strum", + "strum 0.27.2", "sysinfo 0.37.0", "tempfile", "thiserror 2.0.14", @@ -7456,6 +8106,12 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + [[package]] name = "socket2" version = "0.5.10" @@ -7758,13 +8414,32 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ - "strum_macros", + "strum_macros 0.27.2", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.104", ] [[package]] @@ -8021,6 +8696,28 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "log", + "ordered-float 2.10.1", + "threadpool", +] + [[package]] name = "time" version = "0.3.41" @@ -8509,7 +9206,25 @@ version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fe83c85a85875e8c4cb9ce4a890f05b23d38cd0d47647db7895d3d2a79566d2" dependencies = [ - "typed-builder-macro", + "typed-builder-macro 0.15.2", +] + +[[package]] +name = "typed-builder" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06fbd5b8de54c5f7c91f6fe4cebb949be2125d7758e630bb58b1d831dbce600" +dependencies = [ + "typed-builder-macro 0.19.1", +] + +[[package]] +name = "typed-builder" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd9d30e3a08026c78f246b173243cf07b3696d274debd26680773b6773c2afc7" +dependencies = [ + "typed-builder-macro 0.20.1", ] [[package]] @@ -8523,6 +9238,28 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "typed-builder-macro" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + +[[package]] +name = "typed-builder-macro" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "typenum" version = "1.18.0" diff --git a/Cargo.toml b/Cargo.toml index 0fe58f0fc..63043c531 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -165,6 +165,12 @@ tower-http = { version = "0.6.6", features = [ trait-variant = "0.1.2" webpki-roots = "1.0.2" zip = "4.3.0" +arrow = "55.2.0" +arrow-json = "55.2.0" +parquet = "55.2.0" +arrow-array = "55.2.0" +iceberg = "0.6.0" +iceberg-catalog-rest = "0.6.0" # Optional dependencies mimalloc = "0.1" diff --git a/core/connectors/sinks/iceberg_sink/Cargo.toml b/core/connectors/sinks/iceberg_sink/Cargo.toml index 0ac7aa40d..9d276ec1e 100644 --- a/core/connectors/sinks/iceberg_sink/Cargo.toml +++ b/core/connectors/sinks/iceberg_sink/Cargo.toml @@ -38,8 +38,17 @@ once_cell = { workspace = true } reqwest = { workspace = true } serde = { workspace = true } serde_yml = { workspace = true } +serde_json = { workspace = true } simd-json = { workspace = true } tracing = { workspace = true } +arrow-json = { workspace = true } +parquet = { workspace = true } +arrow-array = { workspace = true } +uuid = { workspace = true } +rust-s3 = { workspace = true } +iceberg = { workspace = true } +iceberg-catalog-rest = { workspace = true } +chrono = { workspace = true } [lib] crate-type = ["cdylib", "lib"] diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index 2ee6f6d54..71f3099f4 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -16,17 +16,41 @@ * under the License. */ +use std::collections::HashMap; +use std::io::Cursor; +use std::sync::Arc; + +use arrow_json::ReaderBuilder; use async_trait::async_trait; + +use iceberg::arrow::schema_to_arrow_schema; +use iceberg::transaction::{ApplyTransactionAction, Transaction}; +use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +use iceberg::writer::file_writer::ParquetWriterBuilder; +use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; +use iceberg::TableIdent; +use iceberg::{ + writer::file_writer::location_generator::{DefaultFileNameGenerator, DefaultLocationGenerator}, + Catalog, +}; +use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; use iggy_connector_sdk::{ sink_connector, ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, }; +use parquet::file::properties::WriterProperties; +use s3::bucket::Bucket; +use s3::creds::Credentials; +use s3::region::Region; use serde::{Deserialize, Serialize}; -use tracing::{error, info, warn}; +use serde_json::Value; +use tracing::{error, info}; +use uuid::Uuid; #[derive(Debug, Serialize, Deserialize)] +#[allow(non_camel_case_types)] pub enum IcebergSinkTypes { - REST, - HDFS, + rest, + hdfs, } sink_connector!(IcebergSink); @@ -40,14 +64,62 @@ pub struct IcebergSink { #[derive(Debug, Serialize, Deserialize)] pub struct IcebergSinkConfig { - tables: Vec, - catalog_type: String, - credential: String, - warehouse: String, - uri: String, + pub tables: Vec, + pub catalog_type: IcebergSinkTypes, + pub bucket_name: String, + pub uri: String, + pub credential: String, + pub auto_create: bool, + pub part_size: u32, + pub store_url: String, + pub store_access_key_id: String, + pub store_secret_access_key: String, + pub store_region: String, + pub store_class: String, } impl IcebergSink { + fn flatten_payload(json: &Value) -> Vec<(String, Option)> { + let payload = json.get("payload").and_then(|p| p.get("Json")); + vec![ + ( + "id".to_string(), + payload + .and_then(|p| p.get("id")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + ), + ( + "title".to_string(), + payload + .and_then(|p| p.get("title")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + ), + ( + "name".to_string(), + payload + .and_then(|p| p.get("name")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + ), + ( + "text".to_string(), + payload + .and_then(|p| p.get("text")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + ), + ( + "test_field".to_string(), + payload + .and_then(|p| p.get("test_field")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + ), + ("message".to_string(), Some("".to_string())), // fill missing field + ] + } pub fn new(id: u32, config: IcebergSinkConfig) -> Self { IcebergSink { id, @@ -55,13 +127,81 @@ impl IcebergSink { client: reqwest::Client::new(), } } + fn get_s3_client(&self) -> Bucket { + let credentials = Credentials::new( + Some(&self.config.store_access_key_id.clone()), + Some(&self.config.store_secret_access_key.clone()), + None, + None, + None, + ) + .expect("Invalid credentials"); + + let region = Region::Custom { + region: self.config.store_region.clone(), + endpoint: self.config.store_url.clone(), + }; + + info!("{} {:?} {:?}", self.config.bucket_name, region, credentials); + + let bucket = Bucket::new(&self.config.bucket_name, region, credentials) + .expect("Failed to create S3 bucket client"); + + *bucket.with_path_style() + } + + fn get_iceberg_tables(&self) -> Vec> { + let tables: Vec> = self + .config + .tables + .iter() + .map(|table| { + table + .split('.') + .map(|element| element.to_string()) + .collect::>() + }) + .collect(); + + info!("{:?}", tables); + return tables; + } + + pub fn get_iceberg_url(&self) -> String { + let table_path = self.get_iceberg_tables()[0].clone(); + if table_path.is_empty() { + panic!("table_path must contain at least the table name"); + } + + let table_name = table_path.last().unwrap(); + info!("{}", table_name); + let namespaces = &table_path[..table_path.len() - 1]; + let namespace_path = namespaces.join("."); + + info!( + "{}", + format!( + "{}/v1/namespaces/{}/tables/{}", + self.config.uri, namespace_path, table_name + ) + ); + + if namespace_path.is_empty() { + format!("{}/v1/tables/{}", self.config.uri, table_name) + } else { + format!( + "{}/v1/namespaces/{}/tables/{}", + self.config.uri, namespace_path, table_name + ) + } + } } #[async_trait] impl Sink for IcebergSink { async fn open(&mut self) -> Result<(), Error> { info!( - "Opened Quickwit sink connector with ID: {} for URL: {}", + "Opened Iceberg sink connector with ID: {} for URL: {}", self.id, self.config.uri ); Ok(()) @@ -80,20 +220,138 @@ impl Sink for IcebergSink { messages_metadata.schema ); - let mut json_payloads = Vec::with_capacity(messages.len()); - for message in messages { - match message.payload { - Payload::Json(value) => json_payloads.push(value), - _ => { - warn!("Unsupported payload format: {}", messages_metadata.schema); - } - } - } + info!("{:?}", messages[0].payload); + // + //writer.close().map_err(|error| { + // error!( + // "Failed to send HTTP request to ingest messages for index: {}. {error}", + // self.id + // ); + // Error::HttpRequestFailed(error.to_string()) + //})?; + // + //let bucket = self.get_s3_client(); + // + //info!("{}", bucket.url()); + // + //let key = format!("nyc/users/data/{}.parquet", Uuid::new_v4()); + //let response_bucket = bucket.put_object(&key, &buffer).await.map_err(|error| { + // error!( + // "Failed to commit new parquet file to Iceberg table: {}", + // error + // ); + // Error::HttpRequestFailed(error.to_string()) + //})?; + // + //let parquet_path = format!("s3://{}/{}", self.config.bucket_name, &key); + + //info!("Parquet file uploaded: {}", parquet_path); + + let iceberg_url = self.get_iceberg_url(); - if json_payloads.is_empty() { - return Ok(()); + let mut props: HashMap = HashMap::new(); + + props.insert("s3.region".to_string(), self.config.store_region.clone()); + props.insert( + "s3.access-key-id".to_string(), + self.config.store_access_key_id.clone(), + ); + props.insert( + "s3.secret-access-key".to_string(), + self.config.store_secret_access_key.clone(), + ); + props.insert("s3.endpoint".to_string(), self.config.store_url.clone()); + + let catalog_config = RestCatalogConfig::builder() + .uri(self.config.uri.clone()) + .props(props) + .warehouse(self.config.bucket_name.clone()) + .build(); + + let catalog = RestCatalog::new(catalog_config); + + let table = catalog + .load_table(&TableIdent::from_strs(["nyc", "users"]).map_err(|err| { + error!("Failed to load table from catalog: {}", err); + Error::HttpRequestFailed(err.to_string()) + })?) + .await + .map_err(|err| { + error!("Failed to get table from catalog: {}", err); + Error::HttpRequestFailed(err.to_string()) + })?; + + let location = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); + + let file_name_gen = DefaultFileNameGenerator::new( + "testing".to_string(), + Some(Uuid::new_v4().to_string()), + iceberg::spec::DataFileFormat::Parquet, + ); + + let parquet_writer_builder = ParquetWriterBuilder::new( + WriterProperties::default(), + table.metadata().current_schema().clone(), + table.file_io().clone(), + location, + file_name_gen, + ); + let data_file_writer_builder = DataFileWriterBuilder::new( + parquet_writer_builder, + None, + table.metadata().default_partition_spec_id(), + ); + + let mut writer = data_file_writer_builder.build().await.map_err(|err| { + error!("Error while constructing data file writer: {}", err); + Error::HttpRequestFailed(err.to_string()) + })?; + + let json_messages = messages + .iter() + .filter_map(|record| match &record.payload { + Payload::Json(record) => simd_json::to_string(&record).ok(), + _ => panic!("aaa"), + }) + .collect::>() + .join("\n"); + + let cursor = Cursor::new(json_messages); + + let mut reader = ReaderBuilder::new(Arc::new( + schema_to_arrow_schema(&table.metadata().current_schema().clone()).unwrap(), + )) + .build(cursor) + .unwrap(); + + while let Some(batch) = reader.next() { + let batch_data = batch.unwrap(); + writer.write(batch_data).await.unwrap(); } + let data_files = writer.close().await.unwrap(); + + let table_commit = Transaction::new(&table); + + let action = table_commit.fast_append().add_data_files(data_files); + + let tx = action.apply(table_commit).map_err(|err| { + error!("Failed to apply transaction: {}", err); + Error::HttpRequestFailed(err.to_string()) + })?; + + let table = tx.commit(&catalog).await.map_err(|err| { + error!("Failed to apply transaction on table: {}", err); + Error::HttpRequestFailed(err.to_string()) + })?; + + match self.config.catalog_type { + IcebergSinkTypes::rest => info!("Rest"), + IcebergSinkTypes::hdfs => info!("HDFS"), + }; + + info!("Finished successfully"); + Ok(()) } diff --git a/core/connectors/sinks/quickwit_sink/src/lib.rs b/core/connectors/sinks/quickwit_sink/src/lib.rs index e290b3e0b..218b63de5 100644 --- a/core/connectors/sinks/quickwit_sink/src/lib.rs +++ b/core/connectors/sinks/quickwit_sink/src/lib.rs @@ -18,7 +18,7 @@ use async_trait::async_trait; use iggy_connector_sdk::{ - ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, sink_connector, + sink_connector, ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, }; use serde::{Deserialize, Serialize}; use tracing::{error, info, warn}; diff --git a/core/connectors/sources/iceberg_source/Cargo.toml b/core/connectors/sources/iceberg_source/Cargo.toml index 848674985..a55cae12b 100644 --- a/core/connectors/sources/iceberg_source/Cargo.toml +++ b/core/connectors/sources/iceberg_source/Cargo.toml @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + [package] name = "iceberg_source" version = "0.1.0" From 7d583c15f758fa493f4c5108e201f66ba524f5ce Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Wed, 10 Sep 2025 14:47:51 -0700 Subject: [PATCH 04/28] Refactor iceberg sink --- core/connectors/sinks/iceberg_sink/src/lib.rs | 344 ++++++------------ 1 file changed, 120 insertions(+), 224 deletions(-) diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index 71f3099f4..ff2c80a0f 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -16,6 +16,7 @@ * under the License. */ +use core::fmt; use std::collections::HashMap; use std::io::Cursor; use std::sync::Arc; @@ -24,6 +25,7 @@ use arrow_json::ReaderBuilder; use async_trait::async_trait; use iceberg::arrow::schema_to_arrow_schema; +use iceberg::table::Table; use iceberg::transaction::{ApplyTransactionAction, Transaction}; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; use iceberg::writer::file_writer::ParquetWriterBuilder; @@ -38,11 +40,7 @@ use iggy_connector_sdk::{ sink_connector, ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, }; use parquet::file::properties::WriterProperties; -use s3::bucket::Bucket; -use s3::creds::Credentials; -use s3::region::Region; use serde::{Deserialize, Serialize}; -use serde_json::Value; use tracing::{error, info}; use uuid::Uuid; @@ -50,7 +48,19 @@ use uuid::Uuid; #[allow(non_camel_case_types)] pub enum IcebergSinkTypes { rest, - hdfs, + hive, + glue, +} + +impl fmt::Display for IcebergSinkTypes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + IcebergSinkTypes::rest => "rest", + IcebergSinkTypes::hive => "hive", + IcebergSinkTypes::glue => "glue", + }; + write!(f, "{}", s) + } } sink_connector!(IcebergSink); @@ -59,7 +69,8 @@ sink_connector!(IcebergSink); pub struct IcebergSink { id: u32, config: IcebergSinkConfig, - client: reqwest::Client, + tables: Vec, + catalog: Option, } #[derive(Debug, Serialize, Deserialize)] @@ -79,121 +90,18 @@ pub struct IcebergSinkConfig { } impl IcebergSink { - fn flatten_payload(json: &Value) -> Vec<(String, Option)> { - let payload = json.get("payload").and_then(|p| p.get("Json")); - vec![ - ( - "id".to_string(), - payload - .and_then(|p| p.get("id")) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - ), - ( - "title".to_string(), - payload - .and_then(|p| p.get("title")) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - ), - ( - "name".to_string(), - payload - .and_then(|p| p.get("name")) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - ), - ( - "text".to_string(), - payload - .and_then(|p| p.get("text")) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - ), - ( - "test_field".to_string(), - payload - .and_then(|p| p.get("test_field")) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - ), - ("message".to_string(), Some("".to_string())), // fill missing field - ] - } pub fn new(id: u32, config: IcebergSinkConfig) -> Self { + let tables: Vec
= Vec::with_capacity(config.tables.len()); IcebergSink { id, config, - client: reqwest::Client::new(), + tables, + catalog: None, } } - fn get_s3_client(&self) -> Bucket { - let credentials = Credentials::new( - Some(&self.config.store_access_key_id.clone()), - Some(&self.config.store_secret_access_key.clone()), - None, - None, - None, - ) - .expect("Invalid credentials"); - - let region = Region::Custom { - region: self.config.store_region.clone(), - endpoint: self.config.store_url.clone(), - }; - - info!("{} {:?} {:?}", self.config.bucket_name, region, credentials); - - let bucket = Bucket::new(&self.config.bucket_name, region, credentials) - .expect("Failed to create S3 bucket client"); - - *bucket.with_path_style() - } - - fn get_iceberg_tables(&self) -> Vec> { - let tables: Vec> = self - .config - .tables - .iter() - .map(|table| { - table - .split('.') - .map(|element| element.to_string()) - .collect::>() - }) - .collect(); - - info!("{:?}", tables); - return tables; - } - - pub fn get_iceberg_url(&self) -> String { - let table_path = self.get_iceberg_tables()[0].clone(); - if table_path.is_empty() { - panic!("table_path must contain at least the table name"); - } - - let table_name = table_path.last().unwrap(); - info!("{}", table_name); - let namespaces = &table_path[..table_path.len() - 1]; - let namespace_path = namespaces.join("."); - info!( - "{}", - format!( - "{}/v1/namespaces/{}/tables/{}", - self.config.uri, namespace_path, table_name - ) - ); - - if namespace_path.is_empty() { - format!("{}/v1/tables/{}", self.config.uri, table_name) - } else { - format!( - "{}/v1/namespaces/{}/tables/{}", - self.config.uri, namespace_path, table_name - ) - } + fn slice_user_table(&self, table: &String) -> Vec { + table.split('.').map(|s| s.to_string()).collect() } } @@ -204,51 +112,15 @@ impl Sink for IcebergSink { "Opened Iceberg sink connector with ID: {} for URL: {}", self.id, self.config.uri ); - Ok(()) - } - async fn consume( - &self, - _topic_metadata: &TopicMetadata, - messages_metadata: MessagesMetadata, - messages: Vec, - ) -> Result<(), Error> { info!( - "Iceberg sink with ID: {} received: {} messages, format: {}", - self.id, - messages.len(), - messages_metadata.schema + "Configuring Iceberg catalog with the following config:\n-region: {}\n-url: {}\n-store class: {}\n-catalog type: {}\n", + self.config.store_region, + self.config.store_url, + self.config.store_class, + self.config.catalog_type ); - info!("{:?}", messages[0].payload); - // - //writer.close().map_err(|error| { - // error!( - // "Failed to send HTTP request to ingest messages for index: {}. {error}", - // self.id - // ); - // Error::HttpRequestFailed(error.to_string()) - //})?; - // - //let bucket = self.get_s3_client(); - // - //info!("{}", bucket.url()); - // - //let key = format!("nyc/users/data/{}.parquet", Uuid::new_v4()); - //let response_bucket = bucket.put_object(&key, &buffer).await.map_err(|error| { - // error!( - // "Failed to commit new parquet file to Iceberg table: {}", - // error - // ); - // Error::HttpRequestFailed(error.to_string()) - //})?; - // - //let parquet_path = format!("s3://{}/{}", self.config.bucket_name, &key); - - //info!("Parquet file uploaded: {}", parquet_path); - - let iceberg_url = self.get_iceberg_url(); - let mut props: HashMap = HashMap::new(); props.insert("s3.region".to_string(), self.config.store_region.clone()); @@ -268,87 +140,111 @@ impl Sink for IcebergSink { .warehouse(self.config.bucket_name.clone()) .build(); - let catalog = RestCatalog::new(catalog_config); + let catalog = match self.config.catalog_type { + IcebergSinkTypes::rest => RestCatalog::new(catalog_config), + IcebergSinkTypes::hive => RestCatalog::new(catalog_config), + IcebergSinkTypes::glue => RestCatalog::new(catalog_config), + }; - let table = catalog - .load_table(&TableIdent::from_strs(["nyc", "users"]).map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::HttpRequestFailed(err.to_string()) - })?) - .await - .map_err(|err| { - error!("Failed to get table from catalog: {}", err); - Error::HttpRequestFailed(err.to_string()) - })?; + for declared_table in &self.config.tables { + let sliced_table = self.slice_user_table(&declared_table); + let table = catalog + .load_table(&TableIdent::from_strs(sliced_table).map_err(|err| { + error!("Failed to load table from catalog: {}", err); + Error::HttpRequestFailed(err.to_string()) + })?) + .await + .map_err(|err| { + error!("Failed to get table from catalog: {}", err); + Error::HttpRequestFailed(err.to_string()) + })?; + self.tables.push(table); + } - let location = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); + self.catalog = Some(catalog); + Ok(()) + } - let file_name_gen = DefaultFileNameGenerator::new( - "testing".to_string(), - Some(Uuid::new_v4().to_string()), - iceberg::spec::DataFileFormat::Parquet, + async fn consume( + &self, + _topic_metadata: &TopicMetadata, + messages_metadata: MessagesMetadata, + messages: Vec, + ) -> Result<(), Error> { + info!( + "Iceberg sink with ID: {} received: {} messages, format: {}", + self.id, + messages.len(), + messages_metadata.schema ); + for table in &self.tables { + let location = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); + + let file_name_gen = DefaultFileNameGenerator::new( + Uuid::new_v4().to_string(), + None, + iceberg::spec::DataFileFormat::Parquet, + ); + + let parquet_writer_builder = ParquetWriterBuilder::new( + WriterProperties::default(), + table.metadata().current_schema().clone(), + table.file_io().clone(), + location, + file_name_gen, + ); + let data_file_writer_builder = DataFileWriterBuilder::new( + parquet_writer_builder, + None, + table.metadata().default_partition_spec_id(), + ); + + let mut writer = data_file_writer_builder.build().await.map_err(|err| { + error!("Error while constructing data file writer: {}", err); + Error::HttpRequestFailed(err.to_string()) + })?; - let parquet_writer_builder = ParquetWriterBuilder::new( - WriterProperties::default(), - table.metadata().current_schema().clone(), - table.file_io().clone(), - location, - file_name_gen, - ); - let data_file_writer_builder = DataFileWriterBuilder::new( - parquet_writer_builder, - None, - table.metadata().default_partition_spec_id(), - ); + let json_messages = messages + .iter() + .filter_map(|record| match &record.payload { + Payload::Json(record) => simd_json::to_string(&record).ok(), + _ => panic!("aaa"), + }) + .collect::>() + .join("\n"); - let mut writer = data_file_writer_builder.build().await.map_err(|err| { - error!("Error while constructing data file writer: {}", err); - Error::HttpRequestFailed(err.to_string()) - })?; - - let json_messages = messages - .iter() - .filter_map(|record| match &record.payload { - Payload::Json(record) => simd_json::to_string(&record).ok(), - _ => panic!("aaa"), - }) - .collect::>() - .join("\n"); - - let cursor = Cursor::new(json_messages); - - let mut reader = ReaderBuilder::new(Arc::new( - schema_to_arrow_schema(&table.metadata().current_schema().clone()).unwrap(), - )) - .build(cursor) - .unwrap(); - - while let Some(batch) = reader.next() { - let batch_data = batch.unwrap(); - writer.write(batch_data).await.unwrap(); - } + let cursor = Cursor::new(json_messages); - let data_files = writer.close().await.unwrap(); + let mut reader = ReaderBuilder::new(Arc::new( + schema_to_arrow_schema(&table.metadata().current_schema().clone()).unwrap(), + )) + .build(cursor) + .unwrap(); - let table_commit = Transaction::new(&table); + while let Some(batch) = reader.next() { + let batch_data = batch.unwrap(); + writer.write(batch_data).await.unwrap(); + } - let action = table_commit.fast_append().add_data_files(data_files); + let data_files = writer.close().await.unwrap(); - let tx = action.apply(table_commit).map_err(|err| { - error!("Failed to apply transaction: {}", err); - Error::HttpRequestFailed(err.to_string()) - })?; + let table_commit = Transaction::new(&table); - let table = tx.commit(&catalog).await.map_err(|err| { - error!("Failed to apply transaction on table: {}", err); - Error::HttpRequestFailed(err.to_string()) - })?; + let action = table_commit.fast_append().add_data_files(data_files); - match self.config.catalog_type { - IcebergSinkTypes::rest => info!("Rest"), - IcebergSinkTypes::hdfs => info!("HDFS"), - }; + let tx = action.apply(table_commit).map_err(|err| { + error!("Failed to apply transaction: {}", err); + Error::HttpRequestFailed(err.to_string()) + })?; + + let _table = tx + .commit(self.catalog.as_ref().unwrap()) + .await + .map_err(|err| { + error!("Failed to apply transaction on table: {}", err); + Error::HttpRequestFailed(err.to_string()) + })?; + } info!("Finished successfully"); From 1721004f138932c3d14aeb35c39d501b9be81ef4 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Thu, 11 Sep 2025 15:31:43 -0700 Subject: [PATCH 05/28] Add glue support --- Cargo.lock | 660 ++++++++++++++++-- Cargo.toml | 2 + core/connectors/sinks/iceberg_sink/Cargo.toml | 1 + core/connectors/sinks/iceberg_sink/src/lib.rs | 85 ++- 4 files changed, 668 insertions(+), 80 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 577049fe6..2efb8f9eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -834,7 +834,7 @@ checksum = "07a9b245ba0739fc90935094c29adbaee3f977218b5fb95e822e261cda7f56a3" dependencies = [ "http 1.3.1", "log", - "rustls", + "rustls 0.23.31", "serde", "serde_json", "url", @@ -847,6 +847,48 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "aws-config" +version = "1.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bc1b40fb26027769f16960d2f4a6bc20c4bb755d403e552c8c1a73af433c246" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 1.3.1", + "ring", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d025db5d9f52cbc413b167136afb3d8aeea708c0d8884783cf6253be5e22f6f2" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + [[package]] name = "aws-creds" version = "0.38.0" @@ -896,6 +938,320 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "aws-runtime" +version = "1.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c034a1bc1d70e16e7f4e4caf7e9f7693e4c9c24cd91cf17c2a0b21abaebc7c8b" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-glue" +version = "1.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9170d379508a356718ca6c27ffa5c00b5077ecdf7ab5cb27a70834a58e00dd13" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.83.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cd43af212d2a1c4dedff6f044d7e1961e5d9e7cfe773d70f31d9842413886" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.84.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20ec4a95bd48e0db7a424356a161f8d87bd6a4f0af37204775f0da03d9e39fc3" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.85.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "410309ad0df4606bc721aff0d89c3407682845453247213a0ccc5ff8801ee107" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c" +dependencies = [ + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.3.1", + "percent-encoding", + "sha2", + "time", + "tracing", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-http" +version = "0.62.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147e8eea63a40315d704b97bf9bc9b8c1402ae94f89d5ad6f7550d963309da1b" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.12", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.6.0", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.7", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.31", + "rustls-native-certs 0.8.1", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.2", + "tower 0.5.2", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.61.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaa31b350998e703e9826b2104dd6f63be0508666e1aba88137af060e8944047" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "http-body 1.0.1", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07f5e0fc8a6b3f2303f331b94504bbf754d85488f402d6f1dd7a6080f99afe56" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.3.1", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", +] + [[package]] name = "axum" version = "0.7.9" @@ -907,7 +1263,7 @@ dependencies = [ "bytes", "futures-util", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", "itoa", "matchit 0.7.3", @@ -934,9 +1290,9 @@ dependencies = [ "form_urlencoded", "futures-util", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "itoa", "matchit 0.8.4", @@ -967,7 +1323,7 @@ dependencies = [ "bytes", "futures-util", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -986,7 +1342,7 @@ dependencies = [ "bytes", "futures-core", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -1007,15 +1363,15 @@ dependencies = [ "bytes", "fs-err", "http 1.3.1", - "http-body", - "hyper", + "http-body 1.0.1", + "hyper 1.6.0", "hyper-util", "pin-project-lite", - "rustls", - "rustls-pemfile", + "rustls 0.23.31", + "rustls-pemfile 2.2.0", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.2", "tower-service", ] @@ -1057,6 +1413,16 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + [[package]] name = "base64ct" version = "1.8.0" @@ -1462,6 +1828,16 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + [[package]] name = "bytestring" version = "1.4.0" @@ -1897,6 +2273,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -3807,6 +4193,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -3826,7 +4223,7 @@ dependencies = [ "bytes", "futures-core", "http 1.3.1", - "http-body", + "http-body 1.0.1", "pin-project-lite", ] @@ -3860,6 +4257,30 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.6.0" @@ -3871,7 +4292,7 @@ dependencies = [ "futures-util", "h2 0.4.12", "http 1.3.1", - "http-body", + "http-body 1.0.1", "httparse", "httpdate", "itoa", @@ -3881,6 +4302,22 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "rustls-native-certs 0.6.3", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.7" @@ -3888,14 +4325,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.3.1", - "hyper", + "hyper 1.6.0", "hyper-util", "log", - "rustls", - "rustls-native-certs", + "rustls 0.23.31", + "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.2", "tower-service", "webpki-roots 1.0.2", ] @@ -3906,7 +4343,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper", + "hyper 1.6.0", "hyper-util", "pin-project-lite", "tokio", @@ -3925,8 +4362,8 @@ dependencies = [ "futures-core", "futures-util", "http 1.3.1", - "http-body", - "hyper", + "http-body 1.0.1", + "hyper 1.6.0", "ipnet", "libc", "percent-encoding", @@ -4016,6 +4453,24 @@ dependencies = [ "zstd", ] +[[package]] +name = "iceberg-catalog-glue" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9657a2003249c8acf2c50b7170287059b565bc0f35528d525faaca1f8cd50190" +dependencies = [ + "anyhow", + "async-trait", + "aws-config", + "aws-sdk-glue", + "iceberg", + "serde_json", + "tokio", + "tracing", + "typed-builder 0.20.1", + "uuid", +] + [[package]] name = "iceberg-catalog-rest" version = "0.6.0" @@ -4175,10 +4630,10 @@ dependencies = [ "reqwest", "reqwest-middleware", "reqwest-retry", - "rustls", + "rustls 0.23.31", "serde", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.2", "tracing", "trait-variant", "webpki-roots 1.0.2", @@ -4323,7 +4778,7 @@ dependencies = [ "fast-async-mutex", "humantime", "rcgen", - "rustls", + "rustls 0.23.31", "serde", "serde_json", "serde_with", @@ -4343,6 +4798,7 @@ dependencies = [ "chrono", "dashmap", "iceberg", + "iceberg-catalog-glue", "iceberg-catalog-rest", "iggy_connector_sdk", "once_cell", @@ -5782,10 +6238,10 @@ dependencies = [ "futures", "futures-util", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.6.0", + "hyper-rustls 0.27.7", "hyper-timeout", "hyper-util", "jsonwebtoken", @@ -5839,7 +6295,7 @@ dependencies = [ "futures", "getrandom 0.2.16", "http 1.3.1", - "http-body", + "http-body 1.0.1", "log", "md-5", "percent-encoding", @@ -6035,6 +6491,12 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "overload" version = "0.1.1" @@ -6758,7 +7220,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash 2.1.1", - "rustls", + "rustls 0.23.31", "socket2 0.5.10", "thiserror 2.0.14", "tokio", @@ -6779,7 +7241,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash 2.1.1", - "rustls", + "rustls 0.23.31", "rustls-pki-types", "rustls-platform-verifier", "slab", @@ -7080,24 +7542,24 @@ dependencies = [ "futures-core", "futures-util", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.6.0", + "hyper-rustls 0.27.7", "hyper-util", "js-sys", "log", "percent-encoding", "pin-project-lite", "quinn", - "rustls", + "rustls 0.23.31", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.2", "tokio-util", "tower 0.5.2", "tower-http", @@ -7136,7 +7598,7 @@ dependencies = [ "futures", "getrandom 0.2.16", "http 1.3.1", - "hyper", + "hyper 1.6.0", "parking_lot 0.11.2", "reqwest", "reqwest-middleware", @@ -7217,7 +7679,7 @@ dependencies = [ "chrono", "futures", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", "paste", "pin-project-lite", @@ -7411,6 +7873,18 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.31" @@ -7422,11 +7896,23 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.4", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework 2.11.1", +] + [[package]] name = "rustls-native-certs" version = "0.8.1" @@ -7436,7 +7922,16 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 3.3.0", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", ] [[package]] @@ -7464,16 +7959,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19787cda76408ec5404443dc8b31795c87cd8fec49762dc75fa727740d34acc1" dependencies = [ - "core-foundation", + "core-foundation 0.10.1", "core-foundation-sys", "jni", "log", "once_cell", - "rustls", - "rustls-native-certs", + "rustls 0.23.31", + "rustls-native-certs 0.8.1", "rustls-platform-verifier-android", - "rustls-webpki", - "security-framework", + "rustls-webpki 0.103.4", + "security-framework 3.3.0", "security-framework-sys", "webpki-root-certs 0.26.11", "windows-sys 0.59.0", @@ -7485,6 +7980,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.4" @@ -7614,6 +8119,16 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "sdd" version = "3.0.10" @@ -7647,6 +8162,19 @@ dependencies = [ "zeroize", ] +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.9.1", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + [[package]] name = "security-framework" version = "3.3.0" @@ -7654,7 +8182,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" dependencies = [ "bitflags 2.9.1", - "core-foundation", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -7927,8 +8455,8 @@ dependencies = [ "reqwest", "ring", "rust-s3", - "rustls", - "rustls-pemfile", + "rustls 0.23.31", + "rustls-pemfile 2.2.0", "serde", "serde_with", "serial_test", @@ -7938,7 +8466,7 @@ dependencies = [ "tempfile", "thiserror 2.0.14", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.2", "tokio-util", "toml 0.9.5", "tower-http", @@ -8197,7 +8725,7 @@ dependencies = [ "memchr", "once_cell", "percent-encoding", - "rustls", + "rustls 0.23.31", "serde", "serde_json", "sha2", @@ -8366,7 +8894,7 @@ checksum = "eb4dc4d33c68ec1f27d386b5610a351922656e1fdf5c05bbaad930cd1519479a" dependencies = [ "bytes", "futures-util", - "http-body", + "http-body 1.0.1", "http-body-util", "pin-project-lite", ] @@ -8817,13 +9345,23 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ - "rustls", + "rustls 0.23.31", "tokio", ] @@ -8956,9 +9494,9 @@ dependencies = [ "bytes", "h2 0.4.12", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-timeout", "hyper-util", "percent-encoding", @@ -8983,9 +9521,9 @@ dependencies = [ "base64 0.22.1", "bytes", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-timeout", "hyper-util", "percent-encoding", @@ -9061,7 +9599,7 @@ dependencies = [ "bytes", "futures-util", "http 1.3.1", - "http-body", + "http-body 1.0.1", "iri-string", "pin-project-lite", "tower 0.5.2", @@ -9511,6 +10049,12 @@ version = "0.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "wait-timeout" version = "0.2.1" @@ -10273,6 +10817,12 @@ dependencies = [ "tap", ] +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "yaml-rust2" version = "0.10.3" diff --git a/Cargo.toml b/Cargo.toml index 63043c531..71cccb6cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -171,6 +171,8 @@ parquet = "55.2.0" arrow-array = "55.2.0" iceberg = "0.6.0" iceberg-catalog-rest = "0.6.0" +iceberg-catalog-glue = "0.6.0" + # Optional dependencies mimalloc = "0.1" diff --git a/core/connectors/sinks/iceberg_sink/Cargo.toml b/core/connectors/sinks/iceberg_sink/Cargo.toml index 9d276ec1e..e6152b2cb 100644 --- a/core/connectors/sinks/iceberg_sink/Cargo.toml +++ b/core/connectors/sinks/iceberg_sink/Cargo.toml @@ -48,6 +48,7 @@ uuid = { workspace = true } rust-s3 = { workspace = true } iceberg = { workspace = true } iceberg-catalog-rest = { workspace = true } +iceberg-catalog-glue = { workspace = true } chrono = { workspace = true } [lib] diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index ff2c80a0f..d2c8921d8 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -35,6 +35,7 @@ use iceberg::{ writer::file_writer::location_generator::{DefaultFileNameGenerator, DefaultLocationGenerator}, Catalog, }; +use iceberg_catalog_glue::{GlueCatalog, GlueCatalogConfig}; use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; use iggy_connector_sdk::{ sink_connector, ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, @@ -70,7 +71,8 @@ pub struct IcebergSink { id: u32, config: IcebergSinkConfig, tables: Vec
, - catalog: Option, + catalog: Option>, + props: HashMap, } #[derive(Debug, Serialize, Deserialize)] @@ -92,17 +94,68 @@ pub struct IcebergSinkConfig { impl IcebergSink { pub fn new(id: u32, config: IcebergSinkConfig) -> Self { let tables: Vec
= Vec::with_capacity(config.tables.len()); + let mut props: HashMap = HashMap::new(); + + props.insert("s3.region".to_string(), config.store_region.clone()); + props.insert( + "s3.access-key-id".to_string(), + config.store_access_key_id.clone(), + ); + props.insert( + "s3.secret-access-key".to_string(), + config.store_secret_access_key.clone(), + ); + props.insert("s3.endpoint".to_string(), config.store_url.clone()); + IcebergSink { id, config, tables, catalog: None, + props, } } fn slice_user_table(&self, table: &String) -> Vec { table.split('.').map(|s| s.to_string()).collect() } + + #[inline(always)] + fn get_rest_catalog(&self) -> RestCatalog { + let catalog_config = RestCatalogConfig::builder() + .uri(self.config.uri.clone()) + .props(self.props.clone()) + .warehouse(self.config.bucket_name.clone()) + .build(); + + RestCatalog::new(catalog_config) + } + + //#[inline(always)] + //fn get_hms_catalog(&self) -> HmsCatalog { + // let config = HmsCatalogConfig::builder() + // .props(self.props) + // .warehouse(self.config.bucket_name.clone()) + // .address(self.config.uri.clone()) + // .thrift_transport(HmsThriftTransport::Buffered) + // .build(); + // + // HmsCatalog::new(config) + //} + + #[inline(always)] + async fn get_glue_catalog(&self) -> Result { + let config = GlueCatalogConfig::builder() + .props(self.props.clone()) + .warehouse(self.config.bucket_name.clone()) + .build(); + + let catalog = GlueCatalog::new(config).await.map_err(|err| { + error!("Failed to apply transaction on table: {}", err); + Error::HttpRequestFailed(err.to_string()) + }); + return catalog; + } } #[async_trait] @@ -121,29 +174,10 @@ impl Sink for IcebergSink { self.config.catalog_type ); - let mut props: HashMap = HashMap::new(); - - props.insert("s3.region".to_string(), self.config.store_region.clone()); - props.insert( - "s3.access-key-id".to_string(), - self.config.store_access_key_id.clone(), - ); - props.insert( - "s3.secret-access-key".to_string(), - self.config.store_secret_access_key.clone(), - ); - props.insert("s3.endpoint".to_string(), self.config.store_url.clone()); - - let catalog_config = RestCatalogConfig::builder() - .uri(self.config.uri.clone()) - .props(props) - .warehouse(self.config.bucket_name.clone()) - .build(); - - let catalog = match self.config.catalog_type { - IcebergSinkTypes::rest => RestCatalog::new(catalog_config), - IcebergSinkTypes::hive => RestCatalog::new(catalog_config), - IcebergSinkTypes::glue => RestCatalog::new(catalog_config), + let catalog: Box = match self.config.catalog_type { + IcebergSinkTypes::rest => Box::new(self.get_rest_catalog()), + IcebergSinkTypes::hive => Box::new(self.get_rest_catalog()), + IcebergSinkTypes::glue => Box::new(self.get_glue_catalog().await?), }; for declared_table in &self.config.tables { @@ -177,6 +211,7 @@ impl Sink for IcebergSink { messages.len(), messages_metadata.schema ); + for table in &self.tables { let location = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); @@ -238,7 +273,7 @@ impl Sink for IcebergSink { })?; let _table = tx - .commit(self.catalog.as_ref().unwrap()) + .commit(self.catalog.as_ref().unwrap().as_ref()) .await .map_err(|err| { error!("Failed to apply transaction on table: {}", err); From ef0d5d14c755b75ac222ab8b795a38e6213908a2 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 15 Sep 2025 12:41:22 -0700 Subject: [PATCH 06/28] Clean error handling --- core/connectors/sinks/iceberg_sink/src/lib.rs | 97 ++++++++++++++----- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index d2c8921d8..2c076b4ff 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -24,22 +24,24 @@ use std::sync::Arc; use arrow_json::ReaderBuilder; use async_trait::async_trait; +use iceberg::TableIdent; use iceberg::arrow::schema_to_arrow_schema; +use iceberg::spec::Struct; use iceberg::table::Table; use iceberg::transaction::{ApplyTransactionAction, Transaction}; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; -use iceberg::TableIdent; use iceberg::{ - writer::file_writer::location_generator::{DefaultFileNameGenerator, DefaultLocationGenerator}, Catalog, + writer::file_writer::location_generator::{DefaultFileNameGenerator, DefaultLocationGenerator}, }; use iceberg_catalog_glue::{GlueCatalog, GlueCatalogConfig}; use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; use iggy_connector_sdk::{ - sink_connector, ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, + ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, sink_connector, }; +use parquet::data_type::DataType; use parquet::file::properties::WriterProperties; use serde::{Deserialize, Serialize}; use tracing::{error, info}; @@ -49,15 +51,22 @@ use uuid::Uuid; #[allow(non_camel_case_types)] pub enum IcebergSinkTypes { rest, - hive, glue, } +#[derive(Debug, Serialize, Deserialize)] +#[allow(non_camel_case_types)] +pub enum IcebergSinkStoreClass { + s3, + gcs, + fs, + azdls, +} + impl fmt::Display for IcebergSinkTypes { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let s = match self { IcebergSinkTypes::rest => "rest", - IcebergSinkTypes::hive => "hive", IcebergSinkTypes::glue => "glue", }; write!(f, "{}", s) @@ -151,8 +160,8 @@ impl IcebergSink { .build(); let catalog = GlueCatalog::new(config).await.map_err(|err| { - error!("Failed to apply transaction on table: {}", err); - Error::HttpRequestFailed(err.to_string()) + error!("Failed to get glue catalog with error: {}. Make sure the catalog is correctly declared on the config file", err); + Error::InitError(err.to_string()) }); return catalog; } @@ -176,7 +185,6 @@ impl Sink for IcebergSink { let catalog: Box = match self.config.catalog_type { IcebergSinkTypes::rest => Box::new(self.get_rest_catalog()), - IcebergSinkTypes::hive => Box::new(self.get_rest_catalog()), IcebergSinkTypes::glue => Box::new(self.get_glue_catalog().await?), }; @@ -184,13 +192,13 @@ impl Sink for IcebergSink { let sliced_table = self.slice_user_table(&declared_table); let table = catalog .load_table(&TableIdent::from_strs(sliced_table).map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::HttpRequestFailed(err.to_string()) + error!("Failed to load table from catalog: {}. Is the table {} a valid Iceberg table?", err, declared_table); + Error::InitError(err.to_string()) })?) .await .map_err(|err| { - error!("Failed to get table from catalog: {}", err); - Error::HttpRequestFailed(err.to_string()) + error!("Failed to load table from catalog: {}. Is the table {} a valid Iceberg table?", err, declared_table); + Error::InitError(err.to_string()) })?; self.tables.push(table); } @@ -213,7 +221,15 @@ impl Sink for IcebergSink { ); for table in &self.tables { - let location = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); + let location = + DefaultLocationGenerator::new(table.metadata().clone()).map_err(|err| { + error!( + "Failed to get location on table: {}. Error: {}", + table.metadata().uuid(), + err + ); + Error::InvalidConfig + })?; let file_name_gen = DefaultFileNameGenerator::new( Uuid::new_v4().to_string(), @@ -228,6 +244,7 @@ impl Sink for IcebergSink { location, file_name_gen, ); + let data_file_writer_builder = DataFileWriterBuilder::new( parquet_writer_builder, None, @@ -236,14 +253,14 @@ impl Sink for IcebergSink { let mut writer = data_file_writer_builder.build().await.map_err(|err| { error!("Error while constructing data file writer: {}", err); - Error::HttpRequestFailed(err.to_string()) + Error::InitError(err.to_string()) })?; let json_messages = messages .iter() .filter_map(|record| match &record.payload { Payload::Json(record) => simd_json::to_string(&record).ok(), - _ => panic!("aaa"), + _ => None, }) .collect::>() .join("\n"); @@ -251,33 +268,65 @@ impl Sink for IcebergSink { let cursor = Cursor::new(json_messages); let mut reader = ReaderBuilder::new(Arc::new( - schema_to_arrow_schema(&table.metadata().current_schema().clone()).unwrap(), + schema_to_arrow_schema(&table.metadata().current_schema().clone()).map_err( + |err| { + error!( + "Error while mapping records to Iceberg table with uuid: {}. Error {}", + table.metadata().uuid(), + err + ); + Error::InvalidRecord + }, + )?, )) .build(cursor) - .unwrap(); + .map_err(|err| { + error!( + "Error while building Iceberg reader from message payload: {}", + err + ); + Error::InitError(err.to_string()) + })?; while let Some(batch) = reader.next() { - let batch_data = batch.unwrap(); - writer.write(batch_data).await.unwrap(); + let batch_data = batch.map_err(|err| { + error!("Error while getting record batch: {}", err); + Error::InvalidRecord + })?; + writer.write(batch_data).await.map_err(|err| { + error!("Error while writing record batch: {}", err); + Error::InvalidRecord + })?; } - let data_files = writer.close().await.unwrap(); + let data_files = writer.close().await.map_err(|err| { + error!("Error while writing data records to Parquet file: {}", err); + Error::InvalidRecord + })?; let table_commit = Transaction::new(&table); let action = table_commit.fast_append().add_data_files(data_files); let tx = action.apply(table_commit).map_err(|err| { - error!("Failed to apply transaction: {}", err); - Error::HttpRequestFailed(err.to_string()) + error!( + "Failed to apply transaction on table with UUID: {}, Error: {}", + table.metadata().uuid(), + err + ); + Error::InvalidRecord })?; let _table = tx .commit(self.catalog.as_ref().unwrap().as_ref()) .await .map_err(|err| { - error!("Failed to apply transaction on table: {}", err); - Error::HttpRequestFailed(err.to_string()) + error!( + "Failed to commit transaction on table with UUID: {}, Error: {}", + table.metadata().uuid(), + err + ); + Error::InvalidRecord })?; } From ef0d9ce3a2f82a7fb823f584ac455b291e216711 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Wed, 17 Sep 2025 12:23:35 -0700 Subject: [PATCH 07/28] Further error handling --- core/connectors/sinks/iceberg_sink/src/lib.rs | 97 ++++++++++++------- 1 file changed, 63 insertions(+), 34 deletions(-) diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index 2c076b4ff..b68905531 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -24,27 +24,25 @@ use std::sync::Arc; use arrow_json::ReaderBuilder; use async_trait::async_trait; -use iceberg::TableIdent; use iceberg::arrow::schema_to_arrow_schema; -use iceberg::spec::Struct; use iceberg::table::Table; use iceberg::transaction::{ApplyTransactionAction, Transaction}; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; +use iceberg::TableIdent; use iceberg::{ - Catalog, writer::file_writer::location_generator::{DefaultFileNameGenerator, DefaultLocationGenerator}, + Catalog, }; use iceberg_catalog_glue::{GlueCatalog, GlueCatalogConfig}; use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; use iggy_connector_sdk::{ - ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, sink_connector, + sink_connector, ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, }; -use parquet::data_type::DataType; use parquet::file::properties::WriterProperties; use serde::{Deserialize, Serialize}; -use tracing::{error, info}; +use tracing::{error, info, warn}; use uuid::Uuid; #[derive(Debug, Serialize, Deserialize)] @@ -58,9 +56,23 @@ pub enum IcebergSinkTypes { #[allow(non_camel_case_types)] pub enum IcebergSinkStoreClass { s3, - gcs, fs, + gcs, azdls, + oss, +} + +impl fmt::Display for IcebergSinkStoreClass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + IcebergSinkStoreClass::s3 => "s3", + IcebergSinkStoreClass::fs => "fs", + IcebergSinkStoreClass::gcs => "gcs", + IcebergSinkStoreClass::oss => "oss", + IcebergSinkStoreClass::azdls => "azdls", + }; + write!(f, "{}", s) + } } impl fmt::Display for IcebergSinkTypes { @@ -88,33 +100,35 @@ pub struct IcebergSink { pub struct IcebergSinkConfig { pub tables: Vec, pub catalog_type: IcebergSinkTypes, - pub bucket_name: String, + pub warehouse: String, pub uri: String, - pub credential: String, - pub auto_create: bool, - pub part_size: u32, pub store_url: String, pub store_access_key_id: String, pub store_secret_access_key: String, pub store_region: String, - pub store_class: String, + pub store_class: IcebergSinkStoreClass, } impl IcebergSink { - pub fn new(id: u32, config: IcebergSinkConfig) -> Self { - let tables: Vec
= Vec::with_capacity(config.tables.len()); + #[inline(always)] + fn get_props_s3(&self) -> Result, Error> { let mut props: HashMap = HashMap::new(); - - props.insert("s3.region".to_string(), config.store_region.clone()); + props.insert("s3.region".to_string(), self.config.store_region.clone()); props.insert( "s3.access-key-id".to_string(), - config.store_access_key_id.clone(), + self.config.store_access_key_id.clone(), ); props.insert( "s3.secret-access-key".to_string(), - config.store_secret_access_key.clone(), + self.config.store_secret_access_key.clone(), ); - props.insert("s3.endpoint".to_string(), config.store_url.clone()); + props.insert("s3.endpoint".to_string(), self.config.store_url.clone()); + return Ok(props); + } + + pub fn new(id: u32, config: IcebergSinkConfig) -> Self { + let tables: Vec
= Vec::with_capacity(config.tables.len()); + let props = HashMap::new(); IcebergSink { id, @@ -134,7 +148,7 @@ impl IcebergSink { let catalog_config = RestCatalogConfig::builder() .uri(self.config.uri.clone()) .props(self.props.clone()) - .warehouse(self.config.bucket_name.clone()) + .warehouse(self.config.warehouse.clone()) .build(); RestCatalog::new(catalog_config) @@ -156,7 +170,7 @@ impl IcebergSink { async fn get_glue_catalog(&self) -> Result { let config = GlueCatalogConfig::builder() .props(self.props.clone()) - .warehouse(self.config.bucket_name.clone()) + .warehouse(self.config.warehouse.clone()) .build(); let catalog = GlueCatalog::new(config).await.map_err(|err| { @@ -183,6 +197,19 @@ impl Sink for IcebergSink { self.config.catalog_type ); + // Insert adequate props for initializing file IO, else fail to open + self.props = match self.config.store_class { + IcebergSinkStoreClass::s3 => self.get_props_s3()?, + IcebergSinkStoreClass::fs => HashMap::new(), + _ => { + error!( + "Store class {} is not supported yet", + self.config.store_class + ); + return Err(Error::InvalidConfig); + } + }; + let catalog: Box = match self.config.catalog_type { IcebergSinkTypes::rest => Box::new(self.get_rest_catalog()), IcebergSinkTypes::glue => Box::new(self.get_glue_catalog().await?), @@ -192,13 +219,13 @@ impl Sink for IcebergSink { let sliced_table = self.slice_user_table(&declared_table); let table = catalog .load_table(&TableIdent::from_strs(sliced_table).map_err(|err| { - error!("Failed to load table from catalog: {}. Is the table {} a valid Iceberg table?", err, declared_table); + error!("Failed to load table from catalog: {}. ", err); Error::InitError(err.to_string()) })?) .await .map_err(|err| { - error!("Failed to load table from catalog: {}. Is the table {} a valid Iceberg table?", err, declared_table); - Error::InitError(err.to_string()) + error!("Failed to load table from catalog: {}", err); + Error::InitError(err.to_string()) })?; self.tables.push(table); } @@ -260,11 +287,22 @@ impl Sink for IcebergSink { .iter() .filter_map(|record| match &record.payload { Payload::Json(record) => simd_json::to_string(&record).ok(), - _ => None, + _ => { + warn!("Unsupported payload format: {}", messages_metadata.schema); + None + } }) .collect::>() .join("\n"); + if json_messages.is_empty() { + error!( + "Could not serialize payload, expected JSON format, got {} instead", + messages_metadata.schema + ); + return Err(Error::InvalidPayloadType); + } + let cursor = Cursor::new(json_messages); let mut reader = ReaderBuilder::new(Arc::new( @@ -340,12 +378,3 @@ impl Sink for IcebergSink { Ok(()) } } - -#[cfg(test)] -mod tests { - - #[test] - fn it_works() { - assert_eq!(true, true); - } -} From c99cb72137ef1922f039a2fb7d9ef94cdc1ef89e Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Wed, 17 Sep 2025 12:49:15 -0700 Subject: [PATCH 08/28] init auto_create_table --- core/connectors/sinks/iceberg_sink/src/lib.rs | 46 +++++++++++++++---- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index b68905531..f4efc4825 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -102,6 +102,10 @@ pub struct IcebergSinkConfig { pub catalog_type: IcebergSinkTypes, pub warehouse: String, pub uri: String, + pub auto_create: bool, + pub evolve_schema: bool, + pub dynamic_routing: bool, + pub dynamic_route_field: String, pub store_url: String, pub store_access_key_id: String, pub store_secret_access_key: String, @@ -110,6 +114,13 @@ pub struct IcebergSinkConfig { } impl IcebergSink { + async fn create_table(&self, name: String) -> Result { + let table = Table::builder() + .build() + .map_err(|err| Error::InvalidState)?; + return Ok(table); + } + #[inline(always)] fn get_props_s3(&self) -> Result, Error> { let mut props: HashMap = HashMap::new(); @@ -217,16 +228,31 @@ impl Sink for IcebergSink { for declared_table in &self.config.tables { let sliced_table = self.slice_user_table(&declared_table); - let table = catalog - .load_table(&TableIdent::from_strs(sliced_table).map_err(|err| { - error!("Failed to load table from catalog: {}. ", err); - Error::InitError(err.to_string()) - })?) - .await - .map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::InitError(err.to_string()) - })?; + let table_ident = &TableIdent::from_strs(sliced_table.clone()).map_err(|err| { + error!("Failed to load table from catalog: {}. ", err); + Error::InitError(err.to_string()) + })?; + let exists = catalog.table_exists(table_ident).await.map_err(|err| { + error!("Failed to load table from catalog: {}", err); + Error::InitError(err.to_string()) + })?; + + if !exists { + if self.config.auto_create { + // create table and push + let table = self + .create_table(sliced_table.last().unwrap().to_string()) + .await?; + self.tables.push(table); + continue; + } else { + continue; + } + } + let table = catalog.load_table(table_ident).await.map_err(|err| { + error!("Failed to load table from catalog: {}", err); + Error::InitError(err.to_string()) + })?; self.tables.push(table); } From ef252fd4e3ab1b77c6d68b820c70777e73cdfef3 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Wed, 17 Sep 2025 14:06:54 -0700 Subject: [PATCH 09/28] Refactor routers to allow dynamic routing --- core/connectors/sinks/iceberg_sink/src/lib.rs | 213 +++------------- .../sinks/iceberg_sink/src/router.rs | 229 ++++++++++++++++++ 2 files changed, 260 insertions(+), 182 deletions(-) create mode 100644 core/connectors/sinks/iceberg_sink/src/router.rs diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index f4efc4825..8b3469199 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -18,32 +18,22 @@ use core::fmt; use std::collections::HashMap; -use std::io::Cursor; -use std::sync::Arc; -use arrow_json::ReaderBuilder; use async_trait::async_trait; -use iceberg::arrow::schema_to_arrow_schema; use iceberg::table::Table; -use iceberg::transaction::{ApplyTransactionAction, Transaction}; -use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; -use iceberg::writer::file_writer::ParquetWriterBuilder; -use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; -use iceberg::TableIdent; -use iceberg::{ - writer::file_writer::location_generator::{DefaultFileNameGenerator, DefaultLocationGenerator}, - Catalog, -}; +use iceberg::Catalog; use iceberg_catalog_glue::{GlueCatalog, GlueCatalogConfig}; use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; use iggy_connector_sdk::{ - sink_connector, ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, + sink_connector, ConsumedMessage, Error, MessagesMetadata, Sink, TopicMetadata, }; -use parquet::file::properties::WriterProperties; use serde::{Deserialize, Serialize}; -use tracing::{error, info, warn}; -use uuid::Uuid; +use tracing::{error, info}; + +use crate::router::{DynamicRouter, Router, StaticRouter}; + +mod router; #[derive(Debug, Serialize, Deserialize)] #[allow(non_camel_case_types)] @@ -91,9 +81,8 @@ sink_connector!(IcebergSink); pub struct IcebergSink { id: u32, config: IcebergSinkConfig, - tables: Vec
, - catalog: Option>, props: HashMap, + router: Option>, } #[derive(Debug, Serialize, Deserialize)] @@ -113,14 +102,18 @@ pub struct IcebergSinkConfig { pub store_class: IcebergSinkStoreClass, } -impl IcebergSink { - async fn create_table(&self, name: String) -> Result { - let table = Table::builder() - .build() - .map_err(|err| Error::InvalidState)?; - return Ok(table); - } +pub(self) fn slice_user_table(table: &String) -> Vec { + table.split('.').map(|s| s.to_string()).collect() +} + +pub(self) async fn create_table(name: String) -> Result { + let table = Table::builder() + .build() + .map_err(|err| Error::InvalidState)?; + return Ok(table); +} +impl IcebergSink { #[inline(always)] fn get_props_s3(&self) -> Result, Error> { let mut props: HashMap = HashMap::new(); @@ -138,22 +131,17 @@ impl IcebergSink { } pub fn new(id: u32, config: IcebergSinkConfig) -> Self { - let tables: Vec
= Vec::with_capacity(config.tables.len()); let props = HashMap::new(); + let router = None; IcebergSink { id, config, - tables, - catalog: None, + router, props, } } - fn slice_user_table(&self, table: &String) -> Vec { - table.split('.').map(|s| s.to_string()).collect() - } - #[inline(always)] fn get_rest_catalog(&self) -> RestCatalog { let catalog_config = RestCatalogConfig::builder() @@ -226,37 +214,14 @@ impl Sink for IcebergSink { IcebergSinkTypes::glue => Box::new(self.get_glue_catalog().await?), }; - for declared_table in &self.config.tables { - let sliced_table = self.slice_user_table(&declared_table); - let table_ident = &TableIdent::from_strs(sliced_table.clone()).map_err(|err| { - error!("Failed to load table from catalog: {}. ", err); - Error::InitError(err.to_string()) - })?; - let exists = catalog.table_exists(table_ident).await.map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::InitError(err.to_string()) - })?; - - if !exists { - if self.config.auto_create { - // create table and push - let table = self - .create_table(sliced_table.last().unwrap().to_string()) - .await?; - self.tables.push(table); - continue; - } else { - continue; - } - } - let table = catalog.load_table(table_ident).await.map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::InitError(err.to_string()) - })?; - self.tables.push(table); + if self.config.dynamic_routing { + self.router = Some(Box::new(DynamicRouter::new(catalog))) + } else { + self.router = Some(Box::new( + StaticRouter::new(catalog, &self.config.tables, false).await?, + )); } - self.catalog = Some(catalog); Ok(()) } @@ -273,126 +238,10 @@ impl Sink for IcebergSink { messages_metadata.schema ); - for table in &self.tables { - let location = - DefaultLocationGenerator::new(table.metadata().clone()).map_err(|err| { - error!( - "Failed to get location on table: {}. Error: {}", - table.metadata().uuid(), - err - ); - Error::InvalidConfig - })?; - - let file_name_gen = DefaultFileNameGenerator::new( - Uuid::new_v4().to_string(), - None, - iceberg::spec::DataFileFormat::Parquet, - ); - - let parquet_writer_builder = ParquetWriterBuilder::new( - WriterProperties::default(), - table.metadata().current_schema().clone(), - table.file_io().clone(), - location, - file_name_gen, - ); - - let data_file_writer_builder = DataFileWriterBuilder::new( - parquet_writer_builder, - None, - table.metadata().default_partition_spec_id(), - ); - - let mut writer = data_file_writer_builder.build().await.map_err(|err| { - error!("Error while constructing data file writer: {}", err); - Error::InitError(err.to_string()) - })?; - - let json_messages = messages - .iter() - .filter_map(|record| match &record.payload { - Payload::Json(record) => simd_json::to_string(&record).ok(), - _ => { - warn!("Unsupported payload format: {}", messages_metadata.schema); - None - } - }) - .collect::>() - .join("\n"); - - if json_messages.is_empty() { - error!( - "Could not serialize payload, expected JSON format, got {} instead", - messages_metadata.schema - ); - return Err(Error::InvalidPayloadType); - } - - let cursor = Cursor::new(json_messages); - - let mut reader = ReaderBuilder::new(Arc::new( - schema_to_arrow_schema(&table.metadata().current_schema().clone()).map_err( - |err| { - error!( - "Error while mapping records to Iceberg table with uuid: {}. Error {}", - table.metadata().uuid(), - err - ); - Error::InvalidRecord - }, - )?, - )) - .build(cursor) - .map_err(|err| { - error!( - "Error while building Iceberg reader from message payload: {}", - err - ); - Error::InitError(err.to_string()) - })?; - - while let Some(batch) = reader.next() { - let batch_data = batch.map_err(|err| { - error!("Error while getting record batch: {}", err); - Error::InvalidRecord - })?; - writer.write(batch_data).await.map_err(|err| { - error!("Error while writing record batch: {}", err); - Error::InvalidRecord - })?; - } - - let data_files = writer.close().await.map_err(|err| { - error!("Error while writing data records to Parquet file: {}", err); - Error::InvalidRecord - })?; - - let table_commit = Transaction::new(&table); - - let action = table_commit.fast_append().add_data_files(data_files); - - let tx = action.apply(table_commit).map_err(|err| { - error!( - "Failed to apply transaction on table with UUID: {}, Error: {}", - table.metadata().uuid(), - err - ); - Error::InvalidRecord - })?; - - let _table = tx - .commit(self.catalog.as_ref().unwrap().as_ref()) - .await - .map_err(|err| { - error!( - "Failed to commit transaction on table with UUID: {}, Error: {}", - table.metadata().uuid(), - err - ); - Error::InvalidRecord - })?; - } + match &self.router { + Some(router) => router.route_data(messages_metadata, messages).await?, + None => return Err(Error::InvalidConfig), + }; info!("Finished successfully"); diff --git a/core/connectors/sinks/iceberg_sink/src/router.rs b/core/connectors/sinks/iceberg_sink/src/router.rs new file mode 100644 index 000000000..f1f504c89 --- /dev/null +++ b/core/connectors/sinks/iceberg_sink/src/router.rs @@ -0,0 +1,229 @@ +use std::io::Cursor; +use std::sync::Arc; + +use arrow_json::ReaderBuilder; + +use async_trait::async_trait; +use iceberg::arrow::schema_to_arrow_schema; +use iceberg::table::Table; +use iceberg::transaction::{ApplyTransactionAction, Transaction}; +use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +use iceberg::writer::file_writer::ParquetWriterBuilder; +use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; +use iceberg::TableIdent; +use iceberg::{ + writer::file_writer::location_generator::{DefaultFileNameGenerator, DefaultLocationGenerator}, + Catalog, +}; +use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata, Payload}; +use parquet::file::properties::WriterProperties; +use tracing::{error, info, warn}; +use uuid::Uuid; + +use crate::{create_table, slice_user_table}; + +#[derive(Debug)] +pub(crate) struct DynamicRouter { + catalog: Box, +} + +impl DynamicRouter { + pub fn new(catalog: Box) -> Self { + Self { catalog } + } +} + +#[async_trait] +impl Router for DynamicRouter { + async fn route_data( + &self, + messages_metadata: MessagesMetadata, + messages: Vec, + ) -> Result<(), crate::Error> { + Ok(()) + } +} + +#[derive(Debug)] +pub(crate) struct StaticRouter { + tables: Vec
, + catalog: Box, +} + +impl StaticRouter { + pub async fn new( + catalog: Box, + declared_tables: &Vec, + auto_create: bool, + ) -> Result { + let mut tables: Vec
= Vec::with_capacity(declared_tables.len()); + for declared_table in declared_tables { + let sliced_table = slice_user_table(&declared_table); + let table_ident = &TableIdent::from_strs(sliced_table.clone()).map_err(|err| { + error!("Failed to load table from catalog: {}. ", err); + Error::InitError(err.to_string()) + })?; + let exists = catalog.table_exists(table_ident).await.map_err(|err| { + error!("Failed to load table from catalog: {}", err); + Error::InitError(err.to_string()) + })?; + + if !exists { + if auto_create { + // create table and push + let table = create_table(sliced_table.last().unwrap().to_string()).await?; + tables.push(table); + continue; + } else { + continue; + } + } + let table = catalog.load_table(table_ident).await.map_err(|err| { + error!("Failed to load table from catalog: {}", err); + Error::InitError(err.to_string()) + })?; + tables.push(table); + } + Ok(StaticRouter { tables, catalog }) + } +} + +#[async_trait] +impl Router for StaticRouter { + async fn route_data( + &self, + messages_metadata: MessagesMetadata, + messages: Vec, + ) -> Result<(), crate::Error> { + for table in &self.tables { + let location = + DefaultLocationGenerator::new(table.metadata().clone()).map_err(|err| { + error!( + "Failed to get location on table: {}. Error: {}", + table.metadata().uuid(), + err + ); + Error::InvalidConfig + })?; + + let file_name_gen = DefaultFileNameGenerator::new( + Uuid::new_v4().to_string(), + None, + iceberg::spec::DataFileFormat::Parquet, + ); + + let parquet_writer_builder = ParquetWriterBuilder::new( + WriterProperties::default(), + table.metadata().current_schema().clone(), + table.file_io().clone(), + location, + file_name_gen, + ); + + let data_file_writer_builder = DataFileWriterBuilder::new( + parquet_writer_builder, + None, + table.metadata().default_partition_spec_id(), + ); + + let mut writer = data_file_writer_builder.build().await.map_err(|err| { + error!("Error while constructing data file writer: {}", err); + Error::InitError(err.to_string()) + })?; + + let json_messages = messages + .iter() + .filter_map(|record| match &record.payload { + Payload::Json(record) => simd_json::to_string(&record).ok(), + _ => { + warn!("Unsupported payload format: {}", messages_metadata.schema); + None + } + }) + .collect::>() + .join("\n"); + + if json_messages.is_empty() { + error!( + "Could not serialize payload, expected JSON format, got {} instead", + messages_metadata.schema + ); + return Err(Error::InvalidPayloadType); + } + + let cursor = Cursor::new(json_messages); + + let mut reader = ReaderBuilder::new(Arc::new( + schema_to_arrow_schema(&table.metadata().current_schema().clone()).map_err( + |err| { + error!( + "Error while mapping records to Iceberg table with uuid: {}. Error {}", + table.metadata().uuid(), + err + ); + Error::InvalidRecord + }, + )?, + )) + .build(cursor) + .map_err(|err| { + error!( + "Error while building Iceberg reader from message payload: {}", + err + ); + Error::InitError(err.to_string()) + })?; + + while let Some(batch) = reader.next() { + let batch_data = batch.map_err(|err| { + error!("Error while getting record batch: {}", err); + Error::InvalidRecord + })?; + writer.write(batch_data).await.map_err(|err| { + error!("Error while writing record batch: {}", err); + Error::InvalidRecord + })?; + } + + let data_files = writer.close().await.map_err(|err| { + error!("Error while writing data records to Parquet file: {}", err); + Error::InvalidRecord + })?; + + let table_commit = Transaction::new(&table); + + let action = table_commit.fast_append().add_data_files(data_files); + + let tx = action.apply(table_commit).map_err(|err| { + error!( + "Failed to apply transaction on table with UUID: {}, Error: {}", + table.metadata().uuid(), + err + ); + Error::InvalidRecord + })?; + + let _table = tx.commit(self.catalog.as_ref()).await.map_err(|err| { + error!( + "Failed to commit transaction on table with UUID: {}, Error: {}", + table.metadata().uuid(), + err + ); + Error::InvalidRecord + })?; + } + + info!("Finished successfully"); + + Ok(()) + } +} + +#[async_trait] +pub trait Router: std::fmt::Debug + Sync + Send { + async fn route_data( + &self, + messages_metadata: MessagesMetadata, + messages: Vec, + ) -> Result<(), crate::Error>; +} From 1a4ad26ac113eb8c2e7ce64e0c790660e0fbb04c Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Sat, 20 Sep 2025 15:26:35 -0700 Subject: [PATCH 10/28] multitable dynamic routing support --- core/connectors/sinks/iceberg_sink/src/lib.rs | 13 +- .../sinks/iceberg_sink/src/router.rs | 395 ++++++++++++------ 2 files changed, 284 insertions(+), 124 deletions(-) diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index 8b3469199..a21c256cc 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -199,7 +199,6 @@ impl Sink for IcebergSink { // Insert adequate props for initializing file IO, else fail to open self.props = match self.config.store_class { IcebergSinkStoreClass::s3 => self.get_props_s3()?, - IcebergSinkStoreClass::fs => HashMap::new(), _ => { error!( "Store class {} is not supported yet", @@ -215,10 +214,13 @@ impl Sink for IcebergSink { }; if self.config.dynamic_routing { - self.router = Some(Box::new(DynamicRouter::new(catalog))) + self.router = Some(Box::new(DynamicRouter::new( + catalog, + self.config.dynamic_route_field.clone(), + ))) } else { self.router = Some(Box::new( - StaticRouter::new(catalog, &self.config.tables, false).await?, + StaticRouter::new(catalog, &self.config.tables, self.config.auto_create).await?, )); } @@ -240,7 +242,10 @@ impl Sink for IcebergSink { match &self.router { Some(router) => router.route_data(messages_metadata, messages).await?, - None => return Err(Error::InvalidConfig), + None => { + error!("Iceberg connector has no router configured"); + return Err(Error::InvalidConfig); + } }; info!("Finished successfully"); diff --git a/core/connectors/sinks/iceberg_sink/src/router.rs b/core/connectors/sinks/iceberg_sink/src/router.rs index f1f504c89..49425f02e 100644 --- a/core/connectors/sinks/iceberg_sink/src/router.rs +++ b/core/connectors/sinks/iceberg_sink/src/router.rs @@ -1,3 +1,22 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use std::collections::HashMap; use std::io::Cursor; use std::sync::Arc; @@ -15,24 +34,227 @@ use iceberg::{ writer::file_writer::location_generator::{DefaultFileNameGenerator, DefaultLocationGenerator}, Catalog, }; -use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata, Payload}; +use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata, Payload, Schema}; use parquet::file::properties::WriterProperties; +use simd_json::base::ValueAsObject; use tracing::{error, info, warn}; use uuid::Uuid; use crate::{create_table, slice_user_table}; +async fn write_data( + messages: I, + table: &Table, + catalog: &dyn Catalog, + messages_schema: Schema, +) -> Result<(), Error> +where + I: IntoIterator, + M: std::ops::Deref, +{ + let location = DefaultLocationGenerator::new(table.metadata().clone()).map_err(|err| { + error!( + "Failed to get location on table: {}. Error: {}", + table.metadata().uuid(), + err + ); + Error::InvalidConfig + })?; + + let file_name_gen = DefaultFileNameGenerator::new( + Uuid::new_v4().to_string(), + None, + iceberg::spec::DataFileFormat::Parquet, + ); + + let parquet_writer_builder = ParquetWriterBuilder::new( + WriterProperties::default(), + table.metadata().current_schema().clone(), + table.file_io().clone(), + location, + file_name_gen, + ); + + let data_file_writer_builder = DataFileWriterBuilder::new( + parquet_writer_builder, + None, + table.metadata().default_partition_spec_id(), + ); + + let mut writer = data_file_writer_builder.build().await.map_err(|err| { + error!("Error while constructing data file writer: {}", err); + Error::InitError(err.to_string()) + })?; + + // Generic iteration works here: + let json_messages = messages + .into_iter() + .filter_map(|record| match &record.payload { + Payload::Json(record) => simd_json::to_string(&record).ok(), + _ => { + warn!("Unsupported payload format: {}", messages_schema); + None + } + }) + .collect::>() + .join("\n"); + + if json_messages.is_empty() { + error!( + "Could not serialize payload, expected JSON format, got {} instead", + messages_schema + ); + return Err(Error::InvalidPayloadType); + } + + let cursor = Cursor::new(json_messages); + + let mut reader = ReaderBuilder::new(Arc::new( + schema_to_arrow_schema(&table.metadata().current_schema().clone()).map_err(|err| { + error!( + "Error while mapping records to Iceberg table with uuid: {}. Error {}", + table.metadata().uuid(), + err + ); + Error::InvalidRecord + })?, + )) + .build(cursor) + .map_err(|err| { + error!( + "Error while building Iceberg reader from message payload: {}", + err + ); + Error::InitError(err.to_string()) + })?; + + while let Some(batch) = reader.next() { + let batch_data = batch.map_err(|err| { + error!("Error while getting record batch: {}", err); + Error::InvalidRecord + })?; + writer.write(batch_data).await.map_err(|err| { + error!("Error while writing record batch: {}", err); + Error::InvalidRecord + })?; + } + + let data_files = writer.close().await.map_err(|err| { + error!("Error while writing data records to Parquet file: {}", err); + Error::InvalidRecord + })?; + + let table_commit = Transaction::new(&table); + + let action = table_commit.fast_append().add_data_files(data_files); + + let tx = action.apply(table_commit).map_err(|err| { + error!( + "Failed to apply transaction on table with UUID: {}, Error: {}", + table.metadata().uuid(), + err + ); + Error::InvalidRecord + })?; + + let _table = tx.commit(catalog).await.map_err(|err| { + error!( + "Failed to commit transaction on table with UUID: {}, Error: {}", + table.metadata().uuid(), + err + ); + Error::InvalidRecord + })?; + Ok(()) +} + #[derive(Debug)] pub(crate) struct DynamicRouter { catalog: Box, + route_field: String, +} + +struct DynamicWriter { + pub tables_to_write: HashMap, + pub table_to_message: HashMap>>, +} + +impl DynamicWriter { + pub fn new() -> Self { + let tables_to_write = HashMap::new(); + let table_to_message = HashMap::new(); + Self { + tables_to_write, + table_to_message, + } + } + + fn push_to_existing(&mut self, route_field_val: &str, message: &Arc) -> bool { + if let Some(message_vec) = self.table_to_message.get_mut(route_field_val) { + message_vec.push(Arc::clone(message)); + true + } else { + false + } + } + + // This will: + // - Check if the table declared on the route field exists in the iceberg catalog. + // - If it does, it will try to load it to memory and map the name with the Table object so + // that we can dynamically send messages to it's correct destination. + async fn ensure_table_exists( + &mut self, + route_field_val: &str, + catalog: &dyn Catalog, + ) -> Result { + if !table_exists(route_field_val.to_string()).await? { + return Ok(false); + } + + let sliced_table = slice_user_table(&route_field_val.to_string()); + let table_ident = TableIdent::from_strs(sliced_table.clone()).map_err(|err| { + error!("Failed to load table from catalog: {}.", err); + Error::InitError(err.to_string()) + })?; + + let table = catalog.load_table(&table_ident).await.map_err(|err| { + error!("Failed to load table from catalog: {}", err); + Error::InitError(err.to_string()) + })?; + + self.tables_to_write + .insert(route_field_val.to_string(), table); + + Ok(true) + } } impl DynamicRouter { - pub fn new(catalog: Box) -> Self { - Self { catalog } + pub fn new(catalog: Box, route_field: String) -> Self { + Self { + catalog, + route_field, + } + } + + fn extract_route_field(&self, message: &ConsumedMessage) -> Option { + match &message.payload { + Payload::Json(payload) => payload + .as_object() + .and_then(|obj| obj.get(&self.route_field)) + .map(|val| val.to_string()), + _ => { + warn!("Unsupported format for iceberg connector"); + None + } + } } } +async fn table_exists(table: String) -> Result { + Ok(true) +} + #[async_trait] impl Router for DynamicRouter { async fn route_data( @@ -40,6 +262,47 @@ impl Router for DynamicRouter { messages_metadata: MessagesMetadata, messages: Vec, ) -> Result<(), crate::Error> { + let mut writer = DynamicWriter::new(); + for message in messages { + let message = Arc::new(message); + let route_field_val = match self.extract_route_field(&message) { + Some(val) => val, + None => continue, + }; + + if writer.push_to_existing(&route_field_val, &message) { + continue; + } + + let route_field_val_cloned = route_field_val.clone(); + + if writer + .ensure_table_exists(&route_field_val_cloned, self.catalog.as_ref()) + .await? + { + if let Some(msgs) = writer.table_to_message.get_mut(&route_field_val_cloned) { + msgs.push(message); + } else { + let mut message_vec: Vec> = Vec::new(); + message_vec.push(message); + writer + .table_to_message + .insert(route_field_val_cloned, message_vec); + } + } + } + + for (table_name, table_obj) in &writer.tables_to_write { + let messages = writer.table_to_message.get(table_name).unwrap(); + write_data( + messages.iter().map(|arc| Arc::clone(arc)), + table_obj, + self.catalog.as_ref(), + messages_metadata.schema, + ) + .await?; + } + Ok(()) } } @@ -95,126 +358,18 @@ impl Router for StaticRouter { messages_metadata: MessagesMetadata, messages: Vec, ) -> Result<(), crate::Error> { - for table in &self.tables { - let location = - DefaultLocationGenerator::new(table.metadata().clone()).map_err(|err| { - error!( - "Failed to get location on table: {}. Error: {}", - table.metadata().uuid(), - err - ); - Error::InvalidConfig - })?; - - let file_name_gen = DefaultFileNameGenerator::new( - Uuid::new_v4().to_string(), - None, - iceberg::spec::DataFileFormat::Parquet, - ); - - let parquet_writer_builder = ParquetWriterBuilder::new( - WriterProperties::default(), - table.metadata().current_schema().clone(), - table.file_io().clone(), - location, - file_name_gen, - ); - - let data_file_writer_builder = DataFileWriterBuilder::new( - parquet_writer_builder, - None, - table.metadata().default_partition_spec_id(), - ); - - let mut writer = data_file_writer_builder.build().await.map_err(|err| { - error!("Error while constructing data file writer: {}", err); - Error::InitError(err.to_string()) - })?; - - let json_messages = messages - .iter() - .filter_map(|record| match &record.payload { - Payload::Json(record) => simd_json::to_string(&record).ok(), - _ => { - warn!("Unsupported payload format: {}", messages_metadata.schema); - None - } - }) - .collect::>() - .join("\n"); - - if json_messages.is_empty() { - error!( - "Could not serialize payload, expected JSON format, got {} instead", - messages_metadata.schema - ); - return Err(Error::InvalidPayloadType); - } - - let cursor = Cursor::new(json_messages); - - let mut reader = ReaderBuilder::new(Arc::new( - schema_to_arrow_schema(&table.metadata().current_schema().clone()).map_err( - |err| { - error!( - "Error while mapping records to Iceberg table with uuid: {}. Error {}", - table.metadata().uuid(), - err - ); - Error::InvalidRecord - }, - )?, - )) - .build(cursor) - .map_err(|err| { - error!( - "Error while building Iceberg reader from message payload: {}", - err - ); - Error::InitError(err.to_string()) - })?; - - while let Some(batch) = reader.next() { - let batch_data = batch.map_err(|err| { - error!("Error while getting record batch: {}", err); - Error::InvalidRecord - })?; - writer.write(batch_data).await.map_err(|err| { - error!("Error while writing record batch: {}", err); - Error::InvalidRecord - })?; - } - - let data_files = writer.close().await.map_err(|err| { - error!("Error while writing data records to Parquet file: {}", err); - Error::InvalidRecord - })?; - - let table_commit = Transaction::new(&table); - - let action = table_commit.fast_append().add_data_files(data_files); - - let tx = action.apply(table_commit).map_err(|err| { - error!( - "Failed to apply transaction on table with UUID: {}, Error: {}", - table.metadata().uuid(), - err - ); - Error::InvalidRecord - })?; + info!("Finished successfully"); - let _table = tx.commit(self.catalog.as_ref()).await.map_err(|err| { - error!( - "Failed to commit transaction on table with UUID: {}, Error: {}", - table.metadata().uuid(), - err - ); - Error::InvalidRecord - })?; + for table in &self.tables { + write_data( + &messages, + table, + self.catalog.as_ref(), + messages_metadata.schema, + ) + .await?; } - info!("Finished successfully"); - Ok(()) } } From 56372197e2e0cd61b18b8e4742b718550483c056 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 14:49:31 -0700 Subject: [PATCH 11/28] Add support for partitioned data --- core/connectors/sinks/iceberg_sink/src/lib.rs | 12 +- .../sinks/iceberg_sink/src/router.rs | 103 ++++++++++++++---- 2 files changed, 80 insertions(+), 35 deletions(-) diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index a21c256cc..341653895 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -21,7 +21,6 @@ use std::collections::HashMap; use async_trait::async_trait; -use iceberg::table::Table; use iceberg::Catalog; use iceberg_catalog_glue::{GlueCatalog, GlueCatalogConfig}; use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; @@ -91,8 +90,6 @@ pub struct IcebergSinkConfig { pub catalog_type: IcebergSinkTypes, pub warehouse: String, pub uri: String, - pub auto_create: bool, - pub evolve_schema: bool, pub dynamic_routing: bool, pub dynamic_route_field: String, pub store_url: String, @@ -106,13 +103,6 @@ pub(self) fn slice_user_table(table: &String) -> Vec { table.split('.').map(|s| s.to_string()).collect() } -pub(self) async fn create_table(name: String) -> Result { - let table = Table::builder() - .build() - .map_err(|err| Error::InvalidState)?; - return Ok(table); -} - impl IcebergSink { #[inline(always)] fn get_props_s3(&self) -> Result, Error> { @@ -220,7 +210,7 @@ impl Sink for IcebergSink { ))) } else { self.router = Some(Box::new( - StaticRouter::new(catalog, &self.config.tables, self.config.auto_create).await?, + StaticRouter::new(catalog, &self.config.tables).await?, )); } diff --git a/core/connectors/sinks/iceberg_sink/src/router.rs b/core/connectors/sinks/iceberg_sink/src/router.rs index 49425f02e..e49b30ecb 100644 --- a/core/connectors/sinks/iceberg_sink/src/router.rs +++ b/core/connectors/sinks/iceberg_sink/src/router.rs @@ -24,6 +24,7 @@ use arrow_json::ReaderBuilder; use async_trait::async_trait; use iceberg::arrow::schema_to_arrow_schema; +use iceberg::spec::{Literal, PrimitiveLiteral, PrimitiveType, Struct, StructType}; use iceberg::table::Table; use iceberg::transaction::{ApplyTransactionAction, Transaction}; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; @@ -40,7 +41,47 @@ use simd_json::base::ValueAsObject; use tracing::{error, info, warn}; use uuid::Uuid; -use crate::{create_table, slice_user_table}; +use crate::slice_user_table; + +pub fn primitive_type_to_literal(pt: &PrimitiveType) -> Result { + match pt { + PrimitiveType::Boolean => Ok(PrimitiveLiteral::Boolean(false)), + PrimitiveType::Int => Ok(PrimitiveLiteral::Int(0)), + PrimitiveType::Long => Ok(PrimitiveLiteral::Long(0)), + PrimitiveType::Decimal { .. } => Ok(PrimitiveLiteral::Int128(0)), + PrimitiveType::Date => Ok(PrimitiveLiteral::Int(0)), // e.g. days since epoch + PrimitiveType::Time => Ok(PrimitiveLiteral::Long(0)), // microseconds since midnight + PrimitiveType::Timestamp => Ok(PrimitiveLiteral::Long(0)), // microseconds since epoch + PrimitiveType::Timestamptz => Ok(PrimitiveLiteral::Long(0)), + PrimitiveType::TimestampNs => Ok(PrimitiveLiteral::Long(0)), + PrimitiveType::TimestamptzNs => Ok(PrimitiveLiteral::Long(0)), + PrimitiveType::String => Ok(PrimitiveLiteral::String(String::new())), + PrimitiveType::Uuid => Ok(PrimitiveLiteral::Binary(vec![0; 16])), + PrimitiveType::Fixed(len) => Ok(PrimitiveLiteral::Binary(vec![0; *len as usize])), + PrimitiveType::Binary => Ok(PrimitiveLiteral::Binary(Vec::new())), + _ => { + error!("Partition type not supported"); + Err(Error::InvalidConfig) + } + } +} + +fn get_partition_type_value(default_partition_type: &StructType) -> Result, Error> { + let mut fields: Vec> = Vec::new(); + + if default_partition_type.fields().len() == 0 { + return Ok(None); + }; + + for field in default_partition_type.fields() { + let t = field.field_type.as_primitive_type().unwrap(); + + let value = Some(Literal::Primitive(primitive_type_to_literal(t)?)); + + fields.push(value); + } + Ok(Some(Struct::from_iter(fields))) +} async fn write_data( messages: I, @@ -77,7 +118,7 @@ where let data_file_writer_builder = DataFileWriterBuilder::new( parquet_writer_builder, - None, + get_partition_type_value(table.metadata().default_partition_type())?, table.metadata().default_partition_spec_id(), ); @@ -86,7 +127,6 @@ where Error::InitError(err.to_string()) })?; - // Generic iteration works here: let json_messages = messages .into_iter() .filter_map(|record| match &record.payload { @@ -207,12 +247,20 @@ impl DynamicWriter { route_field_val: &str, catalog: &dyn Catalog, ) -> Result { - if !table_exists(route_field_val.to_string()).await? { + let sliced_table = slice_user_table(&route_field_val.to_string()); + let table_ident = &TableIdent::from_strs(&sliced_table).map_err(|err| { + error!("Failed to load table from catalog: {}. ", err); + Error::InitError(err.to_string()) + })?; + + if !catalog.table_exists(table_ident).await.map_err(|err| { + error!("Failed to load table from catalog: {}. ", err); + Error::InitError(err.to_string()) + })? { return Ok(false); } - let sliced_table = slice_user_table(&route_field_val.to_string()); - let table_ident = TableIdent::from_strs(sliced_table.clone()).map_err(|err| { + let table_ident = TableIdent::from_strs(&sliced_table).map_err(|err| { error!("Failed to load table from catalog: {}.", err); Error::InitError(err.to_string()) })?; @@ -251,10 +299,6 @@ impl DynamicRouter { } } -async fn table_exists(table: String) -> Result { - Ok(true) -} - #[async_trait] impl Router for DynamicRouter { async fn route_data( @@ -293,14 +337,22 @@ impl Router for DynamicRouter { } for (table_name, table_obj) in &writer.tables_to_write { - let messages = writer.table_to_message.get(table_name).unwrap(); + let batch_messages = match writer.table_to_message.get(table_name) { + Some(m) => m, + None => continue, + }; write_data( - messages.iter().map(|arc| Arc::clone(arc)), + batch_messages.iter().map(|arc| Arc::clone(arc)), table_obj, self.catalog.as_ref(), messages_metadata.schema, ) .await?; + info!( + "Dynamically routed {} messages to {} iceberg table", + batch_messages.len(), + table_name + ); } Ok(()) @@ -317,9 +369,9 @@ impl StaticRouter { pub async fn new( catalog: Box, declared_tables: &Vec, - auto_create: bool, ) -> Result { let mut tables: Vec
= Vec::with_capacity(declared_tables.len()); + let mut tables_found = 0; for declared_table in declared_tables { let sliced_table = slice_user_table(&declared_table); let table_ident = &TableIdent::from_strs(sliced_table.clone()).map_err(|err| { @@ -332,21 +384,21 @@ impl StaticRouter { })?; if !exists { - if auto_create { - // create table and push - let table = create_table(sliced_table.last().unwrap().to_string()).await?; - tables.push(table); - continue; - } else { - continue; - } - } + continue; + }; + + tables_found += 1; let table = catalog.load_table(table_ident).await.map_err(|err| { error!("Failed to load table from catalog: {}", err); Error::InitError(err.to_string()) })?; tables.push(table); } + info!( + "Static router found {} tables on iceberg catalog from {} tables declared", + tables_found, + declared_tables.len() + ); Ok(StaticRouter { tables, catalog }) } } @@ -358,8 +410,6 @@ impl Router for StaticRouter { messages_metadata: MessagesMetadata, messages: Vec, ) -> Result<(), crate::Error> { - info!("Finished successfully"); - for table in &self.tables { write_data( &messages, @@ -368,6 +418,11 @@ impl Router for StaticRouter { messages_metadata.schema, ) .await?; + info!( + "Routed {} messages to iceberg table {} successfully", + messages.len(), + table.identifier().name() + ); } Ok(()) From bf1fd58560f466dc5d955aee3f93808a464fdb6f Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 15:16:03 -0700 Subject: [PATCH 12/28] Add iceberg sink documentation --- core/connectors/sinks/iceberg_sink/README.md | 81 ++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/core/connectors/sinks/iceberg_sink/README.md b/core/connectors/sinks/iceberg_sink/README.md index e69de29bb..035ed1659 100644 --- a/core/connectors/sinks/iceberg_sink/README.md +++ b/core/connectors/sinks/iceberg_sink/README.md @@ -0,0 +1,81 @@ +# Iceberg Sink Connector + +The Iceberg Sink Connector allows you to consume messages from Iggy topics and store them in Iceberg tables. + +## Features + +- **Support for S3-compatible storage** +- **Support for REST catalogs** +- **Single destination table** +- **Multiple-table fan-out static routing** +- **Multiple-table fan-out dynamic routing** + +## Configuration example + +```toml +[sinks.iceberg.config] +tables = ["nyc.users"] +catalog_type = "rest" +warehouse = "warehouse" +uri = "http://localhost:8181" +dynamic_routing = true +dynamic_route_field = "db_table" +store_url = "http://localhost:9000" +store_access_key_id = "admin" +store_secret_access_key = "password" +store_region = "us-east-1" +store_class = "s3" +``` +# Configuration Options + +- **tables**: The names of the Iceberg tables you want to statically route Iggy messages to. The name should include the table’s namespace, separated by a dot (`.`). +- **catalog_type**: The type of catalog you are routing data to. **Currently, only REST catalogs are fully supported.** +- **warehouse**: The name of the bucket or warehouse where Iggy will upload data files. +- **URI**: The URI of the Iceberg catalog. +- **dynamic_routing**: Enables dynamic routing. See more details later in this document. +- **dynamic_route_field**: The name of the message field that specifies the Iceberg table to route data to. See more details below. +- **store_url**: The URL of the object storage for data uploads. +- **store_access_key_id**: The access key ID of the object storage. +- **store_secret_access_key**: The secret key used to upload data to the object storage. +- **store_region**: The region of the object storage, if applicable. +- **store_class**: The storage class to use. **Currently, only S3-compatible storage is supported.** + +## Dynamic Routing + +If you don't know the names of the Iceberg tables you want to route data to in advance, you can use the dynamic routing feature. +Insert a field in your Iggy messages with the name of the Iceberg table the message should be routed to. The Iggy connector will parse this field at runtime and route the message to the correct table. + +The Iggy Iceberg Connector will skip messages in the following cases: +- The table declared in the message field does not exist. +- The message does not contain the field specified in the `dynamic_route_field` configuration option. + + +### Dynamic routing configuration example + +```toml +[sinks.iceberg.config] +tables = [""] +catalog_type = "rest" +warehouse = "warehouse" +uri = "http://localhost:8181" +dynamic_routing = true +dynamic_route_field = "db_table" +store_url = "http://localhost:9000" +store_access_key_id = "admin" +store_secret_access_key = "password" +store_region = "us-east-1" +store_class = "s3" + +[sinks.iceberg.transforms.add_fields] +enabled = true + +[[sinks.iceberg.transforms.add_fields.fields]] +key = "db_table" +value.static = "nyc.users" +``` + +**Note:** The value in the message field **must** contain both the namespace and the table name, separated by a dot (`.`). +Example: +- Namespace: `nyc` +- Table name: `users` + From 34ce2f051d0e59e058791125711c9c7b5f26a0a7 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 15:16:44 -0700 Subject: [PATCH 13/28] Remove init iceberg source --- .../sources/iceberg_source/Cargo.toml | 23 ------------- .../sources/iceberg_source/README.md | 0 .../sources/iceberg_source/src/lib.rs | 32 ------------------- 3 files changed, 55 deletions(-) delete mode 100644 core/connectors/sources/iceberg_source/Cargo.toml delete mode 100644 core/connectors/sources/iceberg_source/README.md delete mode 100644 core/connectors/sources/iceberg_source/src/lib.rs diff --git a/core/connectors/sources/iceberg_source/Cargo.toml b/core/connectors/sources/iceberg_source/Cargo.toml deleted file mode 100644 index a55cae12b..000000000 --- a/core/connectors/sources/iceberg_source/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "iceberg_source" -version = "0.1.0" -edition = "2024" - -[dependencies] diff --git a/core/connectors/sources/iceberg_source/README.md b/core/connectors/sources/iceberg_source/README.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/core/connectors/sources/iceberg_source/src/lib.rs b/core/connectors/sources/iceberg_source/src/lib.rs deleted file mode 100644 index fc4426b63..000000000 --- a/core/connectors/sources/iceberg_source/src/lib.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -pub fn add(left: u64, right: u64) -> u64 { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} From 466b8f90c4bbc6fc1508ec012e65a7d21683fbdf Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 15:17:44 -0700 Subject: [PATCH 14/28] remove iceberg source from config.toml --- core/connectors/runtime/config.toml | 148 ++++++++++------------------ 1 file changed, 50 insertions(+), 98 deletions(-) diff --git a/core/connectors/runtime/config.toml b/core/connectors/runtime/config.toml index ea868f8ee..c0e7e80a9 100644 --- a/core/connectors/runtime/config.toml +++ b/core/connectors/runtime/config.toml @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. + [http_api] # Optional HTTP API configuration enabled = true address = "127.0.0.1:8081" @@ -43,28 +44,64 @@ password = "iggy" [state] path = "local_state" -[sinks.stdout] +[sinks.iceberg] enabled = true -name = "Stdout sink" -path = "target/release/libiggy_connector_stdout_sink" +name = "Iceberg sink" +path = "target/release/libiggy_connector_iceberg_sink" -[[sinks.stdout.streams]] +[[sinks.iceberg.streams]] stream = "example_stream" topics = ["example_topic"] schema = "json" batch_length = 100 poll_interval = "5ms" -consumer_group = "stdout_sink_connector" - -[sinks.stdout.config] -print_payload = false - -[sinks.stdout.transforms.add_fields] +consumer_group = "iceberg_sink_connector" + +# Local S3 example +[sinks.iceberg.config] +tables = ["nyc.users"] +catalog_type = "rest" +warehouse = "warehouse" +uri = "http://localhost:8181" +dynamic_routing = true +dynamic_route_field = "db_table" +store_url = "http://localhost:9000" +store_access_key_id = "admin" +store_secret_access_key = "password" +store_region = "us-east-1" +store_class = "s3" + + +[sinks.iceberg.transforms.add_fields] enabled = true -[[sinks.stdout.transforms.add_fields.fields]] -key = "message" -value.static = "hello" +[[sinks.iceberg.transforms.add_fields.fields]] +key = "db_table" +value.static = "nyc.users" + +# +# [sinks.stdout] +# enabled = true +# name = "Stdout sink" +# path = "target/release/libiggy_connector_stdout_sink" +# +# [[sinks.stdout.streams]] +# stream = "example_stream" +# topics = ["example_topic"] +# schema = "json" +# batch_length = 100 +# poll_interval = "5ms" +# consumer_group = "stdout_sink_connector" +# +# [sinks.stdout.config] +# print_payload = false +# +# [sinks.stdout.transforms.add_fields] +# enabled = true +# +# [[sinks.stdout.transforms.add_fields.fields]] +# key = "message" +# value.static = "hello" [sources.random] enabled = true @@ -91,88 +128,3 @@ enabled = true [[sources.random.transforms.add_fields.fields]] key = "test_field" value.static = "hello!" - -[sinks.quickwit] -enabled = true -name = "Quickwit sink 1" -path = "target/release/libiggy_connector_quickwit_sink" -config_format = "yaml" - -[[sinks.quickwit.streams]] -stream = "qw" -topics = ["records"] -schema = "json" -batch_length = 1000 -poll_interval = "5ms" -consumer_group = "qw_sink_connector" - -[sinks.quickwit.transforms.add_fields] -enabled = true - -[[sinks.quickwit.transforms.add_fields.fields]] -key = "service_name" -value.static = "qw_connector" - -[[sinks.quickwit.transforms.add_fields.fields]] -key = "timestamp" -value.computed = "timestamp_millis" - -[[sinks.quickwit.transforms.add_fields.fields]] -key = "random_id" -value.computed = "uuid_v7" - -[sinks.quickwit.transforms.delete_fields] -enabled = true -fields = ["email", "created_at"] - -[sinks.quickwit.config] -url = "http://localhost:7280" -index = """ -version: 0.9 - -index_id: events - -doc_mapping: - mode: strict - field_mappings: - - name: timestamp - type: datetime - input_formats: [unix_timestamp] - output_format: unix_timestamp_nanos - indexed: false - fast: true - fast_precision: milliseconds - - name: service_name - type: text - tokenizer: raw - fast: true - - name: random_id - type: text - tokenizer: raw - fast: true - - name: user_id - type: text - tokenizer: raw - fast: true - - name: user_type - type: u64 - fast: true - - name: source - type: text - tokenizer: default - - name: state - type: text - tokenizer: default - - name: message - type: text - tokenizer: default - - timestamp_field: timestamp - -indexing_settings: - commit_timeout_secs: 10 - -retention: - period: 7 days - schedule: daily -""" From 80de6acd25e64eb32ae6989108dca65aebbe08aa Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 17:05:49 -0700 Subject: [PATCH 15/28] Remove iceberg source from cargo lock --- Cargo.lock | 4 ---- Cargo.toml | 2 -- 2 files changed, 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2efb8f9eb..3ca523ff3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4492,10 +4492,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "iceberg_source" -version = "0.1.0" - [[package]] name = "icu_collections" version = "2.0.0" diff --git a/Cargo.toml b/Cargo.toml index 71cccb6cc..5742fe1eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,6 @@ members = [ "core/connectors/sinks/postgres_sink", "core/connectors/sinks/quickwit_sink", "core/connectors/sinks/stdout_sink", - "core/connectors/sources/iceberg_source", "core/connectors/sources/postgres_source", "core/connectors/sources/random_source", "core/integration", @@ -173,7 +172,6 @@ iceberg = "0.6.0" iceberg-catalog-rest = "0.6.0" iceberg-catalog-glue = "0.6.0" - # Optional dependencies mimalloc = "0.1" console-subscriber = "0.4.1" From 213f2bf7e61d6011d6ec9a9d5a545f96184bd2f8 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 17:08:27 -0700 Subject: [PATCH 16/28] Add example iceberg config --- core/connectors/runtime/config.toml | 148 ++++++++++++------ .../{example.toml => example_config.toml} | 71 +++++---- 2 files changed, 141 insertions(+), 78 deletions(-) rename core/connectors/sinks/iceberg_sink/{example.toml => example_config.toml} (72%) diff --git a/core/connectors/runtime/config.toml b/core/connectors/runtime/config.toml index c0e7e80a9..ea868f8ee 100644 --- a/core/connectors/runtime/config.toml +++ b/core/connectors/runtime/config.toml @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. - [http_api] # Optional HTTP API configuration enabled = true address = "127.0.0.1:8081" @@ -44,64 +43,28 @@ password = "iggy" [state] path = "local_state" -[sinks.iceberg] +[sinks.stdout] enabled = true -name = "Iceberg sink" -path = "target/release/libiggy_connector_iceberg_sink" +name = "Stdout sink" +path = "target/release/libiggy_connector_stdout_sink" -[[sinks.iceberg.streams]] +[[sinks.stdout.streams]] stream = "example_stream" topics = ["example_topic"] schema = "json" batch_length = 100 poll_interval = "5ms" -consumer_group = "iceberg_sink_connector" - -# Local S3 example -[sinks.iceberg.config] -tables = ["nyc.users"] -catalog_type = "rest" -warehouse = "warehouse" -uri = "http://localhost:8181" -dynamic_routing = true -dynamic_route_field = "db_table" -store_url = "http://localhost:9000" -store_access_key_id = "admin" -store_secret_access_key = "password" -store_region = "us-east-1" -store_class = "s3" - - -[sinks.iceberg.transforms.add_fields] -enabled = true +consumer_group = "stdout_sink_connector" -[[sinks.iceberg.transforms.add_fields.fields]] -key = "db_table" -value.static = "nyc.users" +[sinks.stdout.config] +print_payload = false -# -# [sinks.stdout] -# enabled = true -# name = "Stdout sink" -# path = "target/release/libiggy_connector_stdout_sink" -# -# [[sinks.stdout.streams]] -# stream = "example_stream" -# topics = ["example_topic"] -# schema = "json" -# batch_length = 100 -# poll_interval = "5ms" -# consumer_group = "stdout_sink_connector" -# -# [sinks.stdout.config] -# print_payload = false -# -# [sinks.stdout.transforms.add_fields] -# enabled = true -# -# [[sinks.stdout.transforms.add_fields.fields]] -# key = "message" -# value.static = "hello" +[sinks.stdout.transforms.add_fields] +enabled = true + +[[sinks.stdout.transforms.add_fields.fields]] +key = "message" +value.static = "hello" [sources.random] enabled = true @@ -128,3 +91,88 @@ enabled = true [[sources.random.transforms.add_fields.fields]] key = "test_field" value.static = "hello!" + +[sinks.quickwit] +enabled = true +name = "Quickwit sink 1" +path = "target/release/libiggy_connector_quickwit_sink" +config_format = "yaml" + +[[sinks.quickwit.streams]] +stream = "qw" +topics = ["records"] +schema = "json" +batch_length = 1000 +poll_interval = "5ms" +consumer_group = "qw_sink_connector" + +[sinks.quickwit.transforms.add_fields] +enabled = true + +[[sinks.quickwit.transforms.add_fields.fields]] +key = "service_name" +value.static = "qw_connector" + +[[sinks.quickwit.transforms.add_fields.fields]] +key = "timestamp" +value.computed = "timestamp_millis" + +[[sinks.quickwit.transforms.add_fields.fields]] +key = "random_id" +value.computed = "uuid_v7" + +[sinks.quickwit.transforms.delete_fields] +enabled = true +fields = ["email", "created_at"] + +[sinks.quickwit.config] +url = "http://localhost:7280" +index = """ +version: 0.9 + +index_id: events + +doc_mapping: + mode: strict + field_mappings: + - name: timestamp + type: datetime + input_formats: [unix_timestamp] + output_format: unix_timestamp_nanos + indexed: false + fast: true + fast_precision: milliseconds + - name: service_name + type: text + tokenizer: raw + fast: true + - name: random_id + type: text + tokenizer: raw + fast: true + - name: user_id + type: text + tokenizer: raw + fast: true + - name: user_type + type: u64 + fast: true + - name: source + type: text + tokenizer: default + - name: state + type: text + tokenizer: default + - name: message + type: text + tokenizer: default + + timestamp_field: timestamp + +indexing_settings: + commit_timeout_secs: 10 + +retention: + period: 7 days + schedule: daily +""" diff --git a/core/connectors/sinks/iceberg_sink/example.toml b/core/connectors/sinks/iceberg_sink/example_config.toml similarity index 72% rename from core/connectors/sinks/iceberg_sink/example.toml rename to core/connectors/sinks/iceberg_sink/example_config.toml index 46afd7013..c0e7e80a9 100644 --- a/core/connectors/sinks/iceberg_sink/example.toml +++ b/core/connectors/sinks/iceberg_sink/example_config.toml @@ -50,43 +50,58 @@ name = "Iceberg sink" path = "target/release/libiggy_connector_iceberg_sink" [[sinks.iceberg.streams]] -stream = "iceberg" -topics = ["taxis"] -# schema = "json" -batch_length = 10 -poll_interval = "5ms" -consumer_group = "iceberg_sink_connector" - -[sinks.iceberg.catalog] -tables = ["nyc.taxis"] -type = "rest" -uri = "https://localhost" -credential = "12345" -warehouse = "warehouse" - - -[sinks.stdout] -enabled = true -name = "Stdout sink" -path = "target/release/libiggy_connector_stdout_sink" - -[[sinks.stdout.streams]] stream = "example_stream" topics = ["example_topic"] schema = "json" batch_length = 100 poll_interval = "5ms" -consumer_group = "stdout_sink_connector" +consumer_group = "iceberg_sink_connector" + +# Local S3 example +[sinks.iceberg.config] +tables = ["nyc.users"] +catalog_type = "rest" +warehouse = "warehouse" +uri = "http://localhost:8181" +dynamic_routing = true +dynamic_route_field = "db_table" +store_url = "http://localhost:9000" +store_access_key_id = "admin" +store_secret_access_key = "password" +store_region = "us-east-1" +store_class = "s3" -[sinks.stdout.config] -print_payload = false -[sinks.stdout.transforms.add_fields] +[sinks.iceberg.transforms.add_fields] enabled = true -[[sinks.stdout.transforms.add_fields.fields]] -key = "message" -value.static = "hello" +[[sinks.iceberg.transforms.add_fields.fields]] +key = "db_table" +value.static = "nyc.users" + +# +# [sinks.stdout] +# enabled = true +# name = "Stdout sink" +# path = "target/release/libiggy_connector_stdout_sink" +# +# [[sinks.stdout.streams]] +# stream = "example_stream" +# topics = ["example_topic"] +# schema = "json" +# batch_length = 100 +# poll_interval = "5ms" +# consumer_group = "stdout_sink_connector" +# +# [sinks.stdout.config] +# print_payload = false +# +# [sinks.stdout.transforms.add_fields] +# enabled = true +# +# [[sinks.stdout.transforms.add_fields.fields]] +# key = "message" +# value.static = "hello" [sources.random] enabled = true From 0fd461a31545563fc7d37beb3f6e841dc2f50c46 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 17:09:27 -0700 Subject: [PATCH 17/28] Revert quickwit sink --- core/connectors/sinks/quickwit_sink/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/connectors/sinks/quickwit_sink/src/lib.rs b/core/connectors/sinks/quickwit_sink/src/lib.rs index 218b63de5..e290b3e0b 100644 --- a/core/connectors/sinks/quickwit_sink/src/lib.rs +++ b/core/connectors/sinks/quickwit_sink/src/lib.rs @@ -18,7 +18,7 @@ use async_trait::async_trait; use iggy_connector_sdk::{ - sink_connector, ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, + ConsumedMessage, Error, MessagesMetadata, Payload, Sink, TopicMetadata, sink_connector, }; use serde::{Deserialize, Serialize}; use tracing::{error, info, warn}; From 8add1ce4b79d1bc23ffd03c63c8963a762dee27b Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 17:11:46 -0700 Subject: [PATCH 18/28] fmt and clippy code changes --- core/connectors/sinks/iceberg_sink/src/lib.rs | 11 +++++----- .../sinks/iceberg_sink/src/router.rs | 21 +++++++++---------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index 341653895..abf0ab0b6 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -25,7 +25,7 @@ use iceberg::Catalog; use iceberg_catalog_glue::{GlueCatalog, GlueCatalogConfig}; use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; use iggy_connector_sdk::{ - sink_connector, ConsumedMessage, Error, MessagesMetadata, Sink, TopicMetadata, + ConsumedMessage, Error, MessagesMetadata, Sink, TopicMetadata, sink_connector, }; use serde::{Deserialize, Serialize}; use tracing::{error, info}; @@ -99,7 +99,7 @@ pub struct IcebergSinkConfig { pub store_class: IcebergSinkStoreClass, } -pub(self) fn slice_user_table(table: &String) -> Vec { +fn slice_user_table(table: &str) -> Vec { table.split('.').map(|s| s.to_string()).collect() } @@ -117,7 +117,7 @@ impl IcebergSink { self.config.store_secret_access_key.clone(), ); props.insert("s3.endpoint".to_string(), self.config.store_url.clone()); - return Ok(props); + Ok(props) } pub fn new(id: u32, config: IcebergSinkConfig) -> Self { @@ -162,11 +162,10 @@ impl IcebergSink { .warehouse(self.config.warehouse.clone()) .build(); - let catalog = GlueCatalog::new(config).await.map_err(|err| { + GlueCatalog::new(config).await.map_err(|err| { error!("Failed to get glue catalog with error: {}. Make sure the catalog is correctly declared on the config file", err); Error::InitError(err.to_string()) - }); - return catalog; + }) } } diff --git a/core/connectors/sinks/iceberg_sink/src/router.rs b/core/connectors/sinks/iceberg_sink/src/router.rs index e49b30ecb..2c1ebd054 100644 --- a/core/connectors/sinks/iceberg_sink/src/router.rs +++ b/core/connectors/sinks/iceberg_sink/src/router.rs @@ -23,6 +23,7 @@ use std::sync::Arc; use arrow_json::ReaderBuilder; use async_trait::async_trait; +use iceberg::TableIdent; use iceberg::arrow::schema_to_arrow_schema; use iceberg::spec::{Literal, PrimitiveLiteral, PrimitiveType, Struct, StructType}; use iceberg::table::Table; @@ -30,10 +31,9 @@ use iceberg::transaction::{ApplyTransactionAction, Transaction}; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; -use iceberg::TableIdent; use iceberg::{ - writer::file_writer::location_generator::{DefaultFileNameGenerator, DefaultLocationGenerator}, Catalog, + writer::file_writer::location_generator::{DefaultFileNameGenerator, DefaultLocationGenerator}, }; use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata, Payload, Schema}; use parquet::file::properties::WriterProperties; @@ -69,7 +69,7 @@ pub fn primitive_type_to_literal(pt: &PrimitiveType) -> Result Result, Error> { let mut fields: Vec> = Vec::new(); - if default_partition_type.fields().len() == 0 { + if default_partition_type.fields().is_empty() { return Ok(None); }; @@ -149,7 +149,7 @@ where let cursor = Cursor::new(json_messages); - let mut reader = ReaderBuilder::new(Arc::new( + let reader = ReaderBuilder::new(Arc::new( schema_to_arrow_schema(&table.metadata().current_schema().clone()).map_err(|err| { error!( "Error while mapping records to Iceberg table with uuid: {}. Error {}", @@ -168,7 +168,7 @@ where Error::InitError(err.to_string()) })?; - while let Some(batch) = reader.next() { + for batch in reader { let batch_data = batch.map_err(|err| { error!("Error while getting record batch: {}", err); Error::InvalidRecord @@ -184,7 +184,7 @@ where Error::InvalidRecord })?; - let table_commit = Transaction::new(&table); + let table_commit = Transaction::new(table); let action = table_commit.fast_append().add_data_files(data_files); @@ -247,7 +247,7 @@ impl DynamicWriter { route_field_val: &str, catalog: &dyn Catalog, ) -> Result { - let sliced_table = slice_user_table(&route_field_val.to_string()); + let sliced_table = slice_user_table(route_field_val); let table_ident = &TableIdent::from_strs(&sliced_table).map_err(|err| { error!("Failed to load table from catalog: {}. ", err); Error::InitError(err.to_string()) @@ -327,8 +327,7 @@ impl Router for DynamicRouter { if let Some(msgs) = writer.table_to_message.get_mut(&route_field_val_cloned) { msgs.push(message); } else { - let mut message_vec: Vec> = Vec::new(); - message_vec.push(message); + let message_vec: Vec> = vec![message]; writer .table_to_message .insert(route_field_val_cloned, message_vec); @@ -342,7 +341,7 @@ impl Router for DynamicRouter { None => continue, }; write_data( - batch_messages.iter().map(|arc| Arc::clone(arc)), + batch_messages.iter().map(Arc::clone), table_obj, self.catalog.as_ref(), messages_metadata.schema, @@ -373,7 +372,7 @@ impl StaticRouter { let mut tables: Vec
= Vec::with_capacity(declared_tables.len()); let mut tables_found = 0; for declared_table in declared_tables { - let sliced_table = slice_user_table(&declared_table); + let sliced_table = slice_user_table(declared_table); let table_ident = &TableIdent::from_strs(sliced_table.clone()).map_err(|err| { error!("Failed to load table from catalog: {}. ", err); Error::InitError(err.to_string()) From 9b80451909b82eac1bfdccbda674142fce413bbd Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 17:27:59 -0700 Subject: [PATCH 19/28] Sort cargo.toml --- Cargo.toml | 144 +++++++++--------- core/connectors/sinks/iceberg_sink/Cargo.toml | 24 +-- 2 files changed, 84 insertions(+), 84 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5742fe1eb..1119a0dab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,23 +1,3 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[profile.release] -lto = true -codegen-units = 1 [workspace] members = [ @@ -50,8 +30,12 @@ exclude = ["foreign/python"] resolver = "2" [workspace.dependencies] +aes-gcm = "0.10.3" ahash = { version = "0.8.12", features = ["serde"] } anyhow = "1.0.99" +arrow = "55.2.0" +arrow-array = "55.2.0" +arrow-json = "55.2.0" async-broadcast = "0.7.2" async-dropper = { version = "0.3.1", features = ["tokio", "simple"] } async-trait = "0.1.88" @@ -65,7 +49,13 @@ async_zip = { version = "0.0.18", features = [ ] } axum = "0.8.4" axum-server = { version = "0.7.2", features = ["tls-rustls"] } +base64 = "0.22.1" bcrypt = "0.17.0" +bench-dashboard-frontend = { path = "core/bench/dashboard/frontend" } +bench-dashboard-server = { path = "core/bench/dashboard/server" } +bench-dashboard-shared = { path = "core/bench/dashboard/shared" } +bench-report = { path = "core/bench/report" } +bench-runner = { path = "core/bench/runner" } bincode = { version = "2.0.1", features = ["serde"] } blake3 = "1.8.2" bon = "3.7.0" @@ -78,50 +68,88 @@ bytes = "1.10.1" charming = "0.6.0" chrono = { version = "0.4.41", features = ["serde"] } clap = { version = "4.5.44", features = ["derive"] } -config = { version = "0.15.13" } + +# Common dependencies across multiple packages +colored = "3.0.0" comfy-table = "7.1.4" +config = { version = "0.15.13" } +console-subscriber = "0.4.1" crc32fast = "1.5.0" crossbeam = "0.8.4" dashmap = "6.1.0" +derive-new = "0.7.0" derive_builder = "0.20.2" derive_more = { version = "2.0.1", features = ["full"] } -derive-new = "0.7.0" dirs = "6.0.0" dlopen2 = "0.8.0" dotenvy = "0.15.7" enum_dispatch = "0.3.13" +env_logger = "0.11.8" figlet-rs = "0.1.5" flume = "0.11.1" futures = "0.3.31" futures-util = "0.3.31" human-repr = "1.1.0" humantime = "2.2.0" +iceberg = "0.6.0" +iceberg-catalog-glue = "0.6.0" +iceberg-catalog-rest = "0.6.0" +iggy = { path = "core/sdk", version = "0.7.0" } + +# Path dependencies +iggy_binary_protocol = { path = "core/binary_protocol", version = "0.7.0" } +iggy_common = { path = "core/common", version = "0.7.0" } +iggy_connector_sdk = { path = "core/connectors/sdk", version = "0.1.0" } +integration = { path = "core/integration" } keyring = { version = "3.6.3", features = ["sync-secret-service", "vendored"] } +lazy_static = "1.5.0" +log = "0.4.27" + +# Optional dependencies +mimalloc = "0.1" +mockall = "0.13.1" nonzero_lit = "0.1.2" once_cell = "1.21.3" +parquet = "55.2.0" passterm = "=2.0.1" -quinn = "0.11.8" postcard = { version = "1.1.3", features = ["alloc"] } +predicates = "3.1.3" +quinn = "0.11.8" rand = "0.9.2" +regex = "1.11.1" reqwest = { version = "0.12.22", default-features = false, features = [ "json", "rustls-tls", ] } reqwest-middleware = { version = "0.4.2", features = ["json"] } reqwest-retry = "0.7.0" +rust-s3 = { version = "0.36.0-beta.2", default-features = false, features = [ + "tokio-rustls-tls", + "tags", +] } rustls = { version = "0.23.31", features = ["ring"] } serde = { version = "1.0.219", features = ["derive", "rc"] } serde_json = "1.0.142" serde_with = { version = "3.14.0", features = ["base64", "macros"] } serde_yml = "0.0.12" serial_test = "3.2.0" +server = { path = "core/server" } simd-json = { version = "0.15.1", features = ["serde_impl"] } +strum = { version = "0.27.2", features = ["derive"] } +strum_macros = "0.27.2" sysinfo = "0.37.0" tempfile = "3.20.0" +test-case = "3.3.1" thiserror = "2.0.14" tokio = { version = "1.47.1", features = ["full"] } tokio-rustls = "0.26.2" +tokio-util = { version = "0.7.16", features = ["compat"] } toml = "0.9.5" +tower-http = { version = "0.6.6", features = [ + "add-extension", + "cors", + "trace", +] } tracing = "0.1.41" tracing-appender = "0.2.3" tracing-subscriber = { version = "0.3.19", default-features = false, features = [ @@ -129,6 +157,8 @@ tracing-subscriber = { version = "0.3.19", default-features = false, features = "env-filter", "ansi", ] } +trait-variant = "0.1.2" +twox-hash = { version = "2.1.1", features = ["xxhash32"] } uuid = { version = "1.18.0", features = [ "v4", "v7", @@ -136,55 +166,25 @@ uuid = { version = "1.18.0", features = [ "serde", "zerocopy", ] } -rust-s3 = { version = "0.36.0-beta.2", default-features = false, features = [ - "tokio-rustls-tls", - "tags", -] } -strum = { version = "0.27.2", features = ["derive"] } -strum_macros = "0.27.2" -aes-gcm = "0.10.3" -base64 = "0.22.1" -twox-hash = { version = "2.1.1", features = ["xxhash32"] } - -# Common dependencies across multiple packages -colored = "3.0.0" -env_logger = "0.11.8" -lazy_static = "1.5.0" -log = "0.4.27" -mockall = "0.13.1" -predicates = "3.1.3" -regex = "1.11.1" -test-case = "3.3.1" -tokio-util = { version = "0.7.16", features = ["compat"] } -tower-http = { version = "0.6.6", features = [ - "add-extension", - "cors", - "trace", -] } -trait-variant = "0.1.2" webpki-roots = "1.0.2" zip = "4.3.0" -arrow = "55.2.0" -arrow-json = "55.2.0" -parquet = "55.2.0" -arrow-array = "55.2.0" -iceberg = "0.6.0" -iceberg-catalog-rest = "0.6.0" -iceberg-catalog-glue = "0.6.0" - -# Optional dependencies -mimalloc = "0.1" -console-subscriber = "0.4.1" +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. -# Path dependencies -iggy_binary_protocol = { path = "core/binary_protocol", version = "0.7.0" } -iggy_common = { path = "core/common", version = "0.7.0" } -iggy_connector_sdk = { path = "core/connectors/sdk", version = "0.1.0" } -iggy = { path = "core/sdk", version = "0.7.0" } -server = { path = "core/server" } -integration = { path = "core/integration" } -bench-report = { path = "core/bench/report" } -bench-runner = { path = "core/bench/runner" } -bench-dashboard-frontend = { path = "core/bench/dashboard/frontend" } -bench-dashboard-server = { path = "core/bench/dashboard/server" } -bench-dashboard-shared = { path = "core/bench/dashboard/shared" } +[profile.release] +lto = true +codegen-units = 1 diff --git a/core/connectors/sinks/iceberg_sink/Cargo.toml b/core/connectors/sinks/iceberg_sink/Cargo.toml index e6152b2cb..54870946f 100644 --- a/core/connectors/sinks/iceberg_sink/Cargo.toml +++ b/core/connectors/sinks/iceberg_sink/Cargo.toml @@ -30,26 +30,26 @@ readme = "../../README.md" [package.metadata.cargo-machete] ignored = ["dashmap", "once_cell"] +[lib] +crate-type = ["cdylib", "lib"] + [dependencies] +arrow-array = { workspace = true } +arrow-json = { workspace = true } async-trait = { workspace = true } +chrono = { workspace = true } dashmap = { workspace = true } +iceberg = { workspace = true } +iceberg-catalog-glue = { workspace = true } +iceberg-catalog-rest = { workspace = true } iggy_connector_sdk = { workspace = true } once_cell = { workspace = true } +parquet = { workspace = true } reqwest = { workspace = true } +rust-s3 = { workspace = true } serde = { workspace = true } -serde_yml = { workspace = true } serde_json = { workspace = true } +serde_yml = { workspace = true } simd-json = { workspace = true } tracing = { workspace = true } -arrow-json = { workspace = true } -parquet = { workspace = true } -arrow-array = { workspace = true } uuid = { workspace = true } -rust-s3 = { workspace = true } -iceberg = { workspace = true } -iceberg-catalog-rest = { workspace = true } -iceberg-catalog-glue = { workspace = true } -chrono = { workspace = true } - -[lib] -crate-type = ["cdylib", "lib"] From 988864eb153956b026ba6da94526f9b63f932285 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 17:30:51 -0700 Subject: [PATCH 20/28] Clean dependencies with cargo machete --- core/connectors/sinks/iceberg_sink/Cargo.toml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/core/connectors/sinks/iceberg_sink/Cargo.toml b/core/connectors/sinks/iceberg_sink/Cargo.toml index 54870946f..ebc8850d3 100644 --- a/core/connectors/sinks/iceberg_sink/Cargo.toml +++ b/core/connectors/sinks/iceberg_sink/Cargo.toml @@ -34,10 +34,8 @@ ignored = ["dashmap", "once_cell"] crate-type = ["cdylib", "lib"] [dependencies] -arrow-array = { workspace = true } arrow-json = { workspace = true } async-trait = { workspace = true } -chrono = { workspace = true } dashmap = { workspace = true } iceberg = { workspace = true } iceberg-catalog-glue = { workspace = true } @@ -45,11 +43,7 @@ iceberg-catalog-rest = { workspace = true } iggy_connector_sdk = { workspace = true } once_cell = { workspace = true } parquet = { workspace = true } -reqwest = { workspace = true } -rust-s3 = { workspace = true } serde = { workspace = true } -serde_json = { workspace = true } -serde_yml = { workspace = true } simd-json = { workspace = true } tracing = { workspace = true } uuid = { workspace = true } From 5f773074a8e6e8c92e76fb8788e9a54531e88031 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 17:34:47 -0700 Subject: [PATCH 21/28] update cargo lock --- Cargo.lock | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3ca523ff3..3a5f777ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4788,10 +4788,8 @@ dependencies = [ name = "iggy_connector_iceberg_sink" version = "0.1.0" dependencies = [ - "arrow-array", "arrow-json", "async-trait", - "chrono", "dashmap", "iceberg", "iceberg-catalog-glue", @@ -4799,11 +4797,7 @@ dependencies = [ "iggy_connector_sdk", "once_cell", "parquet", - "reqwest", - "rust-s3", "serde", - "serde_json", - "serde_yml", "simd-json", "tracing", "uuid", From efc0ea820ce9e0988eee2b8a2d4bc96f4c4e4f83 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Mon, 22 Sep 2025 17:36:30 -0700 Subject: [PATCH 22/28] Fix cargo.toml --- Cargo.toml | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1119a0dab..6bb3b2755 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[profile.release] +lto = true +codegen-units = 1 [workspace] members = [ @@ -168,23 +188,3 @@ uuid = { version = "1.18.0", features = [ ] } webpki-roots = "1.0.2" zip = "4.3.0" -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[profile.release] -lto = true -codegen-units = 1 From 0ae4154947592a4f00aaa90ea530a04d7e2990c2 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Wed, 24 Sep 2025 16:45:30 -0700 Subject: [PATCH 23/28] remove hms commented code --- core/connectors/sinks/iceberg_sink/src/lib.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index abf0ab0b6..443134fce 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -143,18 +143,6 @@ impl IcebergSink { RestCatalog::new(catalog_config) } - //#[inline(always)] - //fn get_hms_catalog(&self) -> HmsCatalog { - // let config = HmsCatalogConfig::builder() - // .props(self.props) - // .warehouse(self.config.bucket_name.clone()) - // .address(self.config.uri.clone()) - // .thrift_transport(HmsThriftTransport::Buffered) - // .build(); - // - // HmsCatalog::new(config) - //} - #[inline(always)] async fn get_glue_catalog(&self) -> Result { let config = GlueCatalogConfig::builder() From 9bcb23674b4cbacaafb7292e4b17c02b077be8ad Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Wed, 24 Sep 2025 17:35:19 -0700 Subject: [PATCH 24/28] fix cargo lock --- Cargo.lock | 398 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 230 insertions(+), 168 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65ef9642d..a69fc5aa0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -457,6 +457,12 @@ dependencies = [ "password-hash", ] +[[package]] +name = "array-init" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" + [[package]] name = "arrayref" version = "0.3.9" @@ -568,7 +574,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.10.0", + "indexmap 2.11.4", "lexical-core", "memchr", "num", @@ -624,7 +630,7 @@ dependencies = [ "memchr", "num", "regex", - "regex-syntax 0.8.5", + "regex-syntax 0.8.6", ] [[package]] @@ -825,7 +831,7 @@ dependencies = [ "base64 0.22.1", "http 1.3.1", "log", - "rustls 0.23.31", + "rustls 0.23.32", "serde", "serde_json", "url", @@ -889,7 +895,7 @@ dependencies = [ "attohttpc", "home", "log", - "quick-xml", + "quick-xml 0.38.3", "rust-ini", "serde", "thiserror 2.0.16", @@ -899,9 +905,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b8ff6c09cd57b16da53641caa860168b88c172a5ee163b0288d3d6eea12786" +checksum = "879b6c89592deb404ba4dc0ae6b58ffd1795c78991cbb5b8bc441c48a070440d" dependencies = [ "aws-lc-sys", "zeroize", @@ -909,15 +915,16 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e44d16778acaf6a9ec9899b92cebd65580b83f685446bf2e1f5d3d732f99dcd" +checksum = "ee74396bee4da70c2e27cf94762714c911725efe69d9e2672f998512a67a4ce4" dependencies = [ "bindgen", "cc", "cmake", "dunce", "fs_extra", + "libloading", ] [[package]] @@ -955,9 +962,9 @@ dependencies = [ [[package]] name = "aws-sdk-glue" -version = "1.119.0" +version = "1.120.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9170d379508a356718ca6c27ffa5c00b5077ecdf7ab5cb27a70834a58e00dd13" +checksum = "fb3d94a26ea645b950bea42532091e39c642693af43343aa0da7ebaa06b6f3bc" dependencies = [ "aws-credential-types", "aws-runtime", @@ -977,9 +984,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.83.0" +version = "1.84.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "643cd43af212d2a1c4dedff6f044d7e1961e5d9e7cfe773d70f31d9842413886" +checksum = "357a841807f6b52cb26123878b3326921e2a25faca412fabdd32bd35b7edd5d3" dependencies = [ "aws-credential-types", "aws-runtime", @@ -999,9 +1006,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.84.0" +version = "1.86.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20ec4a95bd48e0db7a424356a161f8d87bd6a4f0af37204775f0da03d9e39fc3" +checksum = "9d1cc7fb324aa12eb4404210e6381195c5b5e9d52c2682384f295f38716dd3c7" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1021,9 +1028,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.85.0" +version = "1.86.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "410309ad0df4606bc721aff0d89c3407682845453247213a0ccc5ff8801ee107" +checksum = "e7d835f123f307cafffca7b9027c14979f1d403b417d8541d67cf252e8a21e35" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1110,17 +1117,17 @@ dependencies = [ "http 1.3.1", "http-body 0.4.6", "hyper 0.14.32", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-rustls 0.24.2", "hyper-rustls 0.27.7", "hyper-util", "pin-project-lite", "rustls 0.21.12", - "rustls 0.23.31", + "rustls 0.23.32", "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.2", + "tokio-rustls 0.26.3", "tower 0.5.2", "tracing", ] @@ -1283,7 +1290,7 @@ dependencies = [ "http 1.3.1", "http-body 1.0.1", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-util", "itoa", "matchit 0.8.4", @@ -1355,14 +1362,14 @@ dependencies = [ "fs-err", "http 1.3.1", "http-body 1.0.1", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-util", "pin-project-lite", - "rustls 0.23.31", + "rustls 0.23.32", "rustls-pemfile 2.2.0", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.2", + "tokio-rustls 0.26.3", "tower-service", ] @@ -1645,18 +1652,18 @@ dependencies = [ "home", "http 1.3.1", "http-body-util", - "hyper", + "hyper 1.7.0", "hyper-named-pipe", - "hyper-rustls", + "hyper-rustls 0.27.7", "hyper-util", "hyperlocal", "log", "num", "pin-project-lite", "rand 0.9.2", - "rustls", - "rustls-native-certs", - "rustls-pemfile", + "rustls 0.23.32", + "rustls-native-certs 0.8.1", + "rustls-pemfile 2.2.0", "rustls-pki-types", "serde", "serde_derive", @@ -2626,9 +2633,9 @@ dependencies = [ [[package]] name = "dary_heap" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" +checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04" [[package]] name = "dashmap" @@ -2685,12 +2692,12 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc" +checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071" dependencies = [ "powerfmt", - "serde", + "serde_core", ] [[package]] @@ -3231,9 +3238,9 @@ checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959" [[package]] name = "flatbuffers" -version = "25.2.10" +version = "25.9.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" dependencies = [ "bitflags 2.9.4", "rustc_version", @@ -3463,20 +3470,6 @@ dependencies = [ "slab", ] -[[package]] -name = "generator" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" -dependencies = [ - "cc", - "cfg-if", - "libc", - "log", - "rustversion", - "windows 0.61.3", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -4326,6 +4319,30 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.7.0" @@ -4356,7 +4373,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" dependencies = [ "hex", - "hyper", + "hyper 1.7.0", "hyper-util", "pin-project-lite", "tokio", @@ -4387,14 +4404,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.3.1", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-util", "log", - "rustls 0.23.31", + "rustls 0.23.32", "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.2", + "tokio-rustls 0.26.3", "tower-service", "webpki-roots 1.0.2", ] @@ -4405,7 +4422,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.6.0", + "hyper 1.7.0", "hyper-util", "pin-project-lite", "tokio", @@ -4425,7 +4442,7 @@ dependencies = [ "futures-util", "http 1.3.1", "http-body 1.0.1", - "hyper 1.6.0", + "hyper 1.7.0", "ipnet", "libc", "percent-encoding", @@ -4444,7 +4461,7 @@ checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" dependencies = [ "hex", "http-body-util", - "hyper", + "hyper 1.7.0", "hyper-util", "pin-project-lite", "tokio", @@ -4703,10 +4720,10 @@ dependencies = [ "reqwest", "reqwest-middleware", "reqwest-retry", - "rustls 0.23.31", + "rustls 0.23.32", "serde", "tokio", - "tokio-rustls 0.26.2", + "tokio-rustls 0.26.3", "tracing", "trait-variant", "webpki-roots 1.0.2", @@ -4811,7 +4828,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml_ng", - "strum", + "strum 0.27.2", "thiserror 2.0.16", "tokio", "toml 0.9.7", @@ -4833,9 +4850,8 @@ dependencies = [ "iggy_common", "rmcp", "serde", - "serde_json", - "strum", + "strum 0.27.2", "thiserror 2.0.16", "tokio", "tower-http", @@ -4883,11 +4899,11 @@ dependencies = [ "figment", "humantime", "rcgen", - "rustls 0.23.31", + "rustls 0.23.32", "serde", "serde_json", "serde_with", - "strum", + "strum 0.27.2", "thiserror 2.0.16", "tokio", "toml 0.9.7", @@ -5002,7 +5018,7 @@ dependencies = [ "serde", "serde_json", "simd-json", - "strum_macros", + "strum_macros 0.27.2", "thiserror 2.0.16", "tokio", "tracing", @@ -5334,9 +5350,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.80" +version = "0.3.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852f13bec5eba4ba9afbeb93fd7c13fe56147f055939ae21c43a29a0ecb2702e" +checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" dependencies = [ "once_cell", "wasm-bindgen", @@ -5464,9 +5480,9 @@ dependencies = [ [[package]] name = "lexical-core" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -5477,53 +5493,46 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" dependencies = [ "lexical-parse-integer", "lexical-util", - "static_assertions", ] [[package]] name = "lexical-parse-integer" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" dependencies = [ "lexical-util", - "static_assertions", ] [[package]] name = "lexical-util" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" -dependencies = [ - "static_assertions", -] +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" [[package]] name = "lexical-write-float" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" dependencies = [ "lexical-util", "lexical-write-integer", - "static_assertions", ] [[package]] name = "lexical-write-integer" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" dependencies = [ "lexical-util", - "static_assertions", ] [[package]] @@ -5534,9 +5543,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.175" +version = "0.2.176" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" [[package]] name = "libdbus-sys" @@ -5586,12 +5595,12 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.9" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-link 0.2.0", + "windows-targets 0.53.3", ] [[package]] @@ -5763,25 +5772,21 @@ dependencies = [ "logos-codegen", ] -[[package]] -name = "loom" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" -dependencies = [ - "cfg-if", - "generator", - "scoped-tls", - "tracing", - "tracing-subscriber", -] - [[package]] name = "lru-slab" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash", +] + [[package]] name = "lzma-rust2" version = "0.13.0" @@ -5973,23 +5978,22 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.10" +version = "0.12.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9321642ca94a4282428e6ea4af8cc2ca4eac48ac7a6a4ea8f33f76d0ce70926" +checksum = "8261cd88c312e0004c1d51baad2980c66528dfdb2bee62003e643a4d8f86b077" dependencies = [ "async-lock", "crossbeam-channel", "crossbeam-epoch", "crossbeam-utils", + "equivalent", "event-listener", "futures-util", - "loom", "parking_lot 0.12.4", "portable-atomic", "rustc_version", "smallvec", "tagptr", - "thiserror 1.0.69", "uuid", ] @@ -6306,7 +6310,7 @@ dependencies = [ "http 1.3.1", "http-body 1.0.1", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-rustls 0.27.7", "hyper-timeout", "hyper-util", @@ -6557,6 +6561,12 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "parking" version = "2.2.1" @@ -7264,9 +7274,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.38.3" +version = "0.37.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" dependencies = [ "memchr", "serde", @@ -7274,9 +7284,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.37.5" +version = "0.38.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" dependencies = [ "memchr", "serde", @@ -7294,7 +7304,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", + "rustls 0.23.32", "socket2 0.6.0", "thiserror 2.0.16", "tokio", @@ -7315,7 +7325,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls", + "rustls 0.23.32", "rustls-pki-types", "rustls-platform-verifier", "slab", @@ -7612,7 +7622,7 @@ dependencies = [ "http 1.3.1", "http-body 1.0.1", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-rustls 0.27.7", "hyper-util", "js-sys", @@ -7620,14 +7630,14 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.31", + "rustls 0.23.32", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls 0.26.2", + "tokio-rustls 0.26.3", "tokio-util", "tower 0.5.2", "tower-http", @@ -7666,7 +7676,7 @@ dependencies = [ "futures", "getrandom 0.2.16", "http 1.3.1", - "hyper 1.6.0", + "hyper 1.7.0", "parking_lot 0.11.2", "reqwest", "reqwest-middleware", @@ -7780,6 +7790,16 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "roaring" +version = "0.10.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" +dependencies = [ + "bytemuck", + "byteorder", +] + [[package]] name = "route-recognizer" version = "0.3.1" @@ -7837,7 +7857,7 @@ dependencies = [ "md5", "minidom", "percent-encoding", - "quick-xml", + "quick-xml 0.38.3", "reqwest", "serde", "serde_derive", @@ -7901,6 +7921,18 @@ dependencies = [ "windows-sys 0.61.0", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.32" @@ -7912,7 +7944,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.4", + "rustls-webpki 0.103.6", "subtle", "zeroize", ] @@ -7938,7 +7970,7 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.3.0", + "security-framework 3.5.0", ] [[package]] @@ -7980,11 +8012,11 @@ dependencies = [ "jni", "log", "once_cell", - "rustls 0.23.31", + "rustls 0.23.32", "rustls-native-certs 0.8.1", "rustls-platform-verifier-android", - "rustls-webpki 0.103.4", - "security-framework 3.3.0", + "rustls-webpki 0.103.6", + "security-framework 3.5.0", "security-framework-sys", "webpki-root-certs", "windows-sys 0.59.0", @@ -7996,6 +8028,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.6" @@ -8104,12 +8146,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - [[package]] name = "scopeguard" version = "1.2.0" @@ -8159,6 +8195,19 @@ dependencies = [ "zeroize", ] +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.9.4", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + [[package]] name = "security-framework" version = "3.5.0" @@ -8166,7 +8215,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc198e42d9b7510827939c9a15f5062a0c913f3371d765977e586d2fe6c16f4a" dependencies = [ "bitflags 2.9.4", - "core-foundation", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -8230,6 +8279,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "serde_bytes" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" +dependencies = [ + "serde", + "serde_core", +] + [[package]] name = "serde_core" version = "1.0.226" @@ -8296,17 +8355,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "serde_repr" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - [[package]] name = "serde_spanned" version = "0.6.9" @@ -8453,7 +8501,7 @@ dependencies = [ "reqwest", "ring", "rust-s3", - "rustls 0.23.31", + "rustls 0.23.32", "rustls-pemfile 2.2.0", "serde", "serde_with", @@ -8464,7 +8512,7 @@ dependencies = [ "tempfile", "thiserror 2.0.16", "tokio", - "tokio-rustls 0.26.2", + "tokio-rustls 0.26.3", "tokio-util", "toml 0.9.7", "tower-http", @@ -8722,7 +8770,7 @@ dependencies = [ "memchr", "once_cell", "percent-encoding", - "rustls 0.23.31", + "rustls 0.23.32", "serde", "serde_json", "sha2", @@ -8987,7 +9035,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -9404,13 +9452,23 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f63835928ca123f1bef57abbcd23bb2ba0ac9ae1235f1e65bda0d06e7786bd" dependencies = [ - "rustls 0.23.31", + "rustls 0.23.32", "tokio", ] @@ -9572,7 +9630,7 @@ dependencies = [ "http 1.3.1", "http-body 1.0.1", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-timeout", "hyper-util", "percent-encoding", @@ -9601,7 +9659,7 @@ dependencies = [ "http 1.3.1", "http-body 1.0.1", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-timeout", "hyper-util", "percent-encoding", @@ -9784,11 +9842,9 @@ dependencies = [ "once_cell", "regex-automata", "sharded-slab", - "smallvec", "thread_local", "tracing", "tracing-core", - "tracing-log", ] [[package]] @@ -9863,7 +9919,7 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -9874,7 +9930,7 @@ checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -10008,7 +10064,7 @@ dependencies = [ "base64 0.22.1", "log", "once_cell", - "rustls", + "rustls 0.23.32", "rustls-pki-types", "url", "webpki-roots 0.26.11", @@ -10215,9 +10271,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.103" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab10a69fbd0a177f5f649ad4d8d3305499c42bab9aef2f7ff592d0ec8f833819" +checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" dependencies = [ "cfg-if", "once_cell", @@ -10228,9 +10284,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.103" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bb702423545a6007bbc368fde243ba47ca275e549c8a28617f56f6ba53b1d1c" +checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" dependencies = [ "bumpalo", "log", @@ -10242,9 +10298,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.53" +version = "0.4.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0b221ff421256839509adbb55998214a70d829d3a28c69b4a6672e9d2a42f67" +checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" dependencies = [ "cfg-if", "js-sys", @@ -10255,9 +10311,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.103" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc65f4f411d91494355917b605e1480033152658d71f722a90647f56a70c88a0" +checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -10265,9 +10321,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.103" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffc003a991398a8ee604a401e194b6b3a39677b3173d6e74495eb51b82e99a32" +checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" dependencies = [ "proc-macro2", "quote", @@ -10278,9 +10334,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.103" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "293c37f4efa430ca14db3721dfbe48d8c33308096bd44d80ebaa775ab71ba1cf" +checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" dependencies = [ "unicode-ident", ] @@ -10315,9 +10371,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.80" +version = "0.3.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbe734895e869dc429d78c4b433f8d17d95f8d05317440b4fad5ab2d33e596dc" +checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" dependencies = [ "js-sys", "wasm-bindgen", @@ -10949,6 +11005,12 @@ dependencies = [ "rustix", ] +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "yansi" version = "1.0.1" From 09682d931872001a272de90d013b2faadf4ef8fb Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Wed, 24 Sep 2025 18:59:46 -0700 Subject: [PATCH 25/28] Split iceberg sink into modules, code review --- Cargo.lock | 1 + core/connectors/sinks/iceberg_sink/Cargo.toml | 1 + ...fig.toml => iggy_iceberg_sink_config.toml} | 2 +- .../sinks/iceberg_sink/src/catalog.rs | 60 +++++ core/connectors/sinks/iceberg_sink/src/lib.rs | 193 ++------------ .../sinks/iceberg_sink/src/props.rs | 43 ++++ .../iceberg_sink/src/router/dynamic_router.rs | 179 +++++++++++++ .../src/{router.rs => router/mod.rs} | 239 +----------------- .../iceberg_sink/src/router/static_router.rs | 96 +++++++ .../connectors/sinks/iceberg_sink/src/sink.rs | 91 +++++++ 10 files changed, 499 insertions(+), 406 deletions(-) rename core/connectors/sinks/iceberg_sink/{example_config.toml => iggy_iceberg_sink_config.toml} (97%) create mode 100644 core/connectors/sinks/iceberg_sink/src/catalog.rs create mode 100644 core/connectors/sinks/iceberg_sink/src/props.rs create mode 100644 core/connectors/sinks/iceberg_sink/src/router/dynamic_router.rs rename core/connectors/sinks/iceberg_sink/src/{router.rs => router/mod.rs} (50%) create mode 100644 core/connectors/sinks/iceberg_sink/src/router/static_router.rs create mode 100644 core/connectors/sinks/iceberg_sink/src/sink.rs diff --git a/Cargo.lock b/Cargo.lock index a69fc5aa0..371f5ac95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4925,6 +4925,7 @@ dependencies = [ "parquet", "serde", "simd-json", + "strum 0.27.2", "tracing", "uuid", ] diff --git a/core/connectors/sinks/iceberg_sink/Cargo.toml b/core/connectors/sinks/iceberg_sink/Cargo.toml index ebc8850d3..9949fbd8a 100644 --- a/core/connectors/sinks/iceberg_sink/Cargo.toml +++ b/core/connectors/sinks/iceberg_sink/Cargo.toml @@ -47,3 +47,4 @@ serde = { workspace = true } simd-json = { workspace = true } tracing = { workspace = true } uuid = { workspace = true } +strum = { workspace = true } diff --git a/core/connectors/sinks/iceberg_sink/example_config.toml b/core/connectors/sinks/iceberg_sink/iggy_iceberg_sink_config.toml similarity index 97% rename from core/connectors/sinks/iceberg_sink/example_config.toml rename to core/connectors/sinks/iceberg_sink/iggy_iceberg_sink_config.toml index c0e7e80a9..35c370c8b 100644 --- a/core/connectors/sinks/iceberg_sink/example_config.toml +++ b/core/connectors/sinks/iceberg_sink/iggy_iceberg_sink_config.toml @@ -39,7 +39,7 @@ key_file = "core/certs/iggy_key.pem" address = "localhost:8090" username = "iggy" password = "iggy" -# token = "secret" # Personal Access Token (PAT) can be used instead of username and password +token = "" # Personal Access Token (PAT) can be used instead of username and password [state] path = "local_state" diff --git a/core/connectors/sinks/iceberg_sink/src/catalog.rs b/core/connectors/sinks/iceberg_sink/src/catalog.rs new file mode 100644 index 000000000..bc951e96c --- /dev/null +++ b/core/connectors/sinks/iceberg_sink/src/catalog.rs @@ -0,0 +1,60 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use super::{Error, IcebergSinkConfig, IcebergSinkTypes}; +use crate::props::init_props; +use iceberg::Catalog; +use iceberg_catalog_glue::{GlueCatalog, GlueCatalogConfig}; +use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; +use std::collections::HashMap; +use tracing::error; + +pub async fn init_catalog(config: &IcebergSinkConfig) -> Result, Error> { + let props = init_props(config)?; + match config.catalog_type { + IcebergSinkTypes::REST => Ok(Box::new(get_rest_catalog(config, props))), + IcebergSinkTypes::GLUE => Ok(Box::new(get_glue_catalog(config, props).await?)), + } +} + +#[inline(always)] +fn get_rest_catalog(config: &IcebergSinkConfig, props: HashMap) -> RestCatalog { + let catalog_config = RestCatalogConfig::builder() + .uri(config.uri.clone()) + .props(props.clone()) + .warehouse(config.warehouse.clone()) + .build(); + + RestCatalog::new(catalog_config) +} + +#[inline(always)] +async fn get_glue_catalog( + config: &IcebergSinkConfig, + props: HashMap, +) -> Result { + let config = GlueCatalogConfig::builder() + .props(props.clone()) + .warehouse(config.warehouse.clone()) + .build(); + + GlueCatalog::new(config).await.map_err(|err| { + error!("Failed to get glue catalog with error: {}. Make sure the catalog is correctly declared on the config file", err); + Error::InitError(err.to_string()) + }) +} diff --git a/core/connectors/sinks/iceberg_sink/src/lib.rs b/core/connectors/sinks/iceberg_sink/src/lib.rs index 443134fce..c8b02c325 100644 --- a/core/connectors/sinks/iceberg_sink/src/lib.rs +++ b/core/connectors/sinks/iceberg_sink/src/lib.rs @@ -16,62 +16,31 @@ * under the License. */ -use core::fmt; -use std::collections::HashMap; - -use async_trait::async_trait; - -use iceberg::Catalog; -use iceberg_catalog_glue::{GlueCatalog, GlueCatalogConfig}; -use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; -use iggy_connector_sdk::{ - ConsumedMessage, Error, MessagesMetadata, Sink, TopicMetadata, sink_connector, -}; +use crate::router::Router; +use iggy_connector_sdk::{Error, sink_connector}; use serde::{Deserialize, Serialize}; -use tracing::{error, info}; - -use crate::router::{DynamicRouter, Router, StaticRouter}; +use strum::Display as StrumDisplay; +mod catalog; +mod props; mod router; +mod sink; -#[derive(Debug, Serialize, Deserialize)] -#[allow(non_camel_case_types)] +#[derive(Debug, Serialize, Deserialize, StrumDisplay)] +#[serde(rename_all = "lowercase")] pub enum IcebergSinkTypes { - rest, - glue, + REST, + GLUE, } -#[derive(Debug, Serialize, Deserialize)] -#[allow(non_camel_case_types)] +#[derive(Debug, Serialize, Deserialize, StrumDisplay)] +#[serde(rename_all = "lowercase")] pub enum IcebergSinkStoreClass { - s3, - fs, - gcs, - azdls, - oss, -} - -impl fmt::Display for IcebergSinkStoreClass { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match self { - IcebergSinkStoreClass::s3 => "s3", - IcebergSinkStoreClass::fs => "fs", - IcebergSinkStoreClass::gcs => "gcs", - IcebergSinkStoreClass::oss => "oss", - IcebergSinkStoreClass::azdls => "azdls", - }; - write!(f, "{}", s) - } -} - -impl fmt::Display for IcebergSinkTypes { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match self { - IcebergSinkTypes::rest => "rest", - IcebergSinkTypes::glue => "glue", - }; - write!(f, "{}", s) - } + S3, + FS, + GCS, + AZDLS, + OSS, } sink_connector!(IcebergSink); @@ -80,7 +49,6 @@ sink_connector!(IcebergSink); pub struct IcebergSink { id: u32, config: IcebergSinkConfig, - props: HashMap, router: Option>, } @@ -104,134 +72,9 @@ fn slice_user_table(table: &str) -> Vec { } impl IcebergSink { - #[inline(always)] - fn get_props_s3(&self) -> Result, Error> { - let mut props: HashMap = HashMap::new(); - props.insert("s3.region".to_string(), self.config.store_region.clone()); - props.insert( - "s3.access-key-id".to_string(), - self.config.store_access_key_id.clone(), - ); - props.insert( - "s3.secret-access-key".to_string(), - self.config.store_secret_access_key.clone(), - ); - props.insert("s3.endpoint".to_string(), self.config.store_url.clone()); - Ok(props) - } - pub fn new(id: u32, config: IcebergSinkConfig) -> Self { - let props = HashMap::new(); let router = None; - IcebergSink { - id, - config, - router, - props, - } - } - - #[inline(always)] - fn get_rest_catalog(&self) -> RestCatalog { - let catalog_config = RestCatalogConfig::builder() - .uri(self.config.uri.clone()) - .props(self.props.clone()) - .warehouse(self.config.warehouse.clone()) - .build(); - - RestCatalog::new(catalog_config) - } - - #[inline(always)] - async fn get_glue_catalog(&self) -> Result { - let config = GlueCatalogConfig::builder() - .props(self.props.clone()) - .warehouse(self.config.warehouse.clone()) - .build(); - - GlueCatalog::new(config).await.map_err(|err| { - error!("Failed to get glue catalog with error: {}. Make sure the catalog is correctly declared on the config file", err); - Error::InitError(err.to_string()) - }) - } -} - -#[async_trait] -impl Sink for IcebergSink { - async fn open(&mut self) -> Result<(), Error> { - info!( - "Opened Iceberg sink connector with ID: {} for URL: {}", - self.id, self.config.uri - ); - - info!( - "Configuring Iceberg catalog with the following config:\n-region: {}\n-url: {}\n-store class: {}\n-catalog type: {}\n", - self.config.store_region, - self.config.store_url, - self.config.store_class, - self.config.catalog_type - ); - - // Insert adequate props for initializing file IO, else fail to open - self.props = match self.config.store_class { - IcebergSinkStoreClass::s3 => self.get_props_s3()?, - _ => { - error!( - "Store class {} is not supported yet", - self.config.store_class - ); - return Err(Error::InvalidConfig); - } - }; - - let catalog: Box = match self.config.catalog_type { - IcebergSinkTypes::rest => Box::new(self.get_rest_catalog()), - IcebergSinkTypes::glue => Box::new(self.get_glue_catalog().await?), - }; - - if self.config.dynamic_routing { - self.router = Some(Box::new(DynamicRouter::new( - catalog, - self.config.dynamic_route_field.clone(), - ))) - } else { - self.router = Some(Box::new( - StaticRouter::new(catalog, &self.config.tables).await?, - )); - } - - Ok(()) - } - - async fn consume( - &self, - _topic_metadata: &TopicMetadata, - messages_metadata: MessagesMetadata, - messages: Vec, - ) -> Result<(), Error> { - info!( - "Iceberg sink with ID: {} received: {} messages, format: {}", - self.id, - messages.len(), - messages_metadata.schema - ); - - match &self.router { - Some(router) => router.route_data(messages_metadata, messages).await?, - None => { - error!("Iceberg connector has no router configured"); - return Err(Error::InvalidConfig); - } - }; - - info!("Finished successfully"); - - Ok(()) - } - - async fn close(&mut self) -> Result<(), Error> { - info!("Iceberg sink connector with ID: {} is closed.", self.id); - Ok(()) + IcebergSink { id, config, router } } } diff --git a/core/connectors/sinks/iceberg_sink/src/props.rs b/core/connectors/sinks/iceberg_sink/src/props.rs new file mode 100644 index 000000000..aa01d6d00 --- /dev/null +++ b/core/connectors/sinks/iceberg_sink/src/props.rs @@ -0,0 +1,43 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use super::{Error, IcebergSinkConfig, IcebergSinkStoreClass}; +use std::collections::HashMap; + +pub fn init_props(config: &IcebergSinkConfig) -> Result, Error> { + match config.store_class { + IcebergSinkStoreClass::S3 => Ok(get_props_s3(config)?), + _ => Err(Error::InvalidConfig), + } +} + +#[inline(always)] +fn get_props_s3(config: &IcebergSinkConfig) -> Result, Error> { + let mut props: HashMap = HashMap::new(); + props.insert("s3.region".to_string(), config.store_region.clone()); + props.insert( + "s3.access-key-id".to_string(), + config.store_access_key_id.clone(), + ); + props.insert( + "s3.secret-access-key".to_string(), + config.store_secret_access_key.clone(), + ); + props.insert("s3.endpoint".to_string(), config.store_url.clone()); + Ok(props) +} diff --git a/core/connectors/sinks/iceberg_sink/src/router/dynamic_router.rs b/core/connectors/sinks/iceberg_sink/src/router/dynamic_router.rs new file mode 100644 index 000000000..331b7dab2 --- /dev/null +++ b/core/connectors/sinks/iceberg_sink/src/router/dynamic_router.rs @@ -0,0 +1,179 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use crate::router::{Router, write_data}; +use crate::slice_user_table; +use async_trait::async_trait; +use iceberg::Catalog; +use iceberg::TableIdent; +use iceberg::table::Table; +use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata, Payload}; +use simd_json::base::ValueAsObject; +use std::collections::HashMap; +use std::sync::Arc; +use tracing::{error, info, warn}; + +#[derive(Debug)] +pub struct DynamicRouter { + catalog: Box, + route_field: String, +} + +pub struct DynamicWriter { + pub tables_to_write: HashMap, + pub table_to_message: HashMap>>, +} + +impl DynamicWriter { + pub fn new() -> Self { + let tables_to_write = HashMap::new(); + let table_to_message = HashMap::new(); + Self { + tables_to_write, + table_to_message, + } + } + + fn push_to_existing(&mut self, route_field_val: &str, message: &Arc) -> bool { + if let Some(message_vec) = self.table_to_message.get_mut(route_field_val) { + message_vec.push(Arc::clone(message)); + true + } else { + false + } + } + + // This will: + // - Check if the table declared on the route field exists in the iceberg catalog. + // - If it does, it will try to load it to memory and map the name with the Table object so + // that we can dynamically send messages to it's correct destination. + async fn ensure_table_exists( + &mut self, + route_field_val: &str, + catalog: &dyn Catalog, + ) -> Result { + let sliced_table = slice_user_table(route_field_val); + let table_ident = &TableIdent::from_strs(&sliced_table).map_err(|err| { + error!("Failed to load table from catalog: {}. ", err); + Error::InitError(err.to_string()) + })?; + + if !catalog.table_exists(table_ident).await.map_err(|err| { + error!("Failed to load table from catalog: {}. ", err); + Error::InitError(err.to_string()) + })? { + return Ok(false); + } + + let table_ident = TableIdent::from_strs(&sliced_table).map_err(|err| { + error!("Failed to load table from catalog: {}.", err); + Error::InitError(err.to_string()) + })?; + + let table = catalog.load_table(&table_ident).await.map_err(|err| { + error!("Failed to load table from catalog: {}", err); + Error::InitError(err.to_string()) + })?; + + self.tables_to_write + .insert(route_field_val.to_string(), table); + + Ok(true) + } +} + +impl DynamicRouter { + pub fn new(catalog: Box, route_field: String) -> Self { + Self { + catalog, + route_field, + } + } + + fn extract_route_field(&self, message: &ConsumedMessage) -> Option { + match &message.payload { + Payload::Json(payload) => payload + .as_object() + .and_then(|obj| obj.get(&self.route_field)) + .map(|val| val.to_string()), + _ => { + warn!("Unsupported format for iceberg connector"); + None + } + } + } +} + +#[async_trait] +impl Router for DynamicRouter { + async fn route_data( + &self, + messages_metadata: MessagesMetadata, + messages: Vec, + ) -> Result<(), crate::Error> { + let mut writer = DynamicWriter::new(); + for message in messages { + let message = Arc::new(message); + let route_field_val = match self.extract_route_field(&message) { + Some(val) => val, + None => continue, + }; + + if writer.push_to_existing(&route_field_val, &message) { + continue; + } + + let route_field_val_cloned = route_field_val.clone(); + + if writer + .ensure_table_exists(&route_field_val_cloned, self.catalog.as_ref()) + .await? + { + if let Some(msgs) = writer.table_to_message.get_mut(&route_field_val_cloned) { + msgs.push(message); + } else { + let message_vec: Vec> = vec![message]; + writer + .table_to_message + .insert(route_field_val_cloned, message_vec); + } + } + } + + for (table_name, table_obj) in &writer.tables_to_write { + let batch_messages = match writer.table_to_message.get(table_name) { + Some(m) => m, + None => continue, + }; + write_data( + batch_messages.iter().map(Arc::clone), + table_obj, + self.catalog.as_ref(), + messages_metadata.schema, + ) + .await?; + info!( + "Dynamically routed {} messages to {} iceberg table", + batch_messages.len(), + table_name + ); + } + + Ok(()) + } +} diff --git a/core/connectors/sinks/iceberg_sink/src/router.rs b/core/connectors/sinks/iceberg_sink/src/router/mod.rs similarity index 50% rename from core/connectors/sinks/iceberg_sink/src/router.rs rename to core/connectors/sinks/iceberg_sink/src/router/mod.rs index 2c1ebd054..e18d22390 100644 --- a/core/connectors/sinks/iceberg_sink/src/router.rs +++ b/core/connectors/sinks/iceberg_sink/src/router/mod.rs @@ -16,14 +16,8 @@ * under the License. */ -use std::collections::HashMap; -use std::io::Cursor; -use std::sync::Arc; - use arrow_json::ReaderBuilder; - use async_trait::async_trait; -use iceberg::TableIdent; use iceberg::arrow::schema_to_arrow_schema; use iceberg::spec::{Literal, PrimitiveLiteral, PrimitiveType, Struct, StructType}; use iceberg::table::Table; @@ -37,11 +31,13 @@ use iceberg::{ }; use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata, Payload, Schema}; use parquet::file::properties::WriterProperties; -use simd_json::base::ValueAsObject; -use tracing::{error, info, warn}; +use std::io::Cursor; +use std::sync::Arc; +use tracing::{error, warn}; use uuid::Uuid; -use crate::slice_user_table; +pub mod dynamic_router; +pub mod static_router; pub fn primitive_type_to_literal(pt: &PrimitiveType) -> Result { match pt { @@ -74,7 +70,10 @@ fn get_partition_type_value(default_partition_type: &StructType) -> Result, - route_field: String, -} - -struct DynamicWriter { - pub tables_to_write: HashMap, - pub table_to_message: HashMap>>, -} - -impl DynamicWriter { - pub fn new() -> Self { - let tables_to_write = HashMap::new(); - let table_to_message = HashMap::new(); - Self { - tables_to_write, - table_to_message, - } - } - - fn push_to_existing(&mut self, route_field_val: &str, message: &Arc) -> bool { - if let Some(message_vec) = self.table_to_message.get_mut(route_field_val) { - message_vec.push(Arc::clone(message)); - true - } else { - false - } - } - - // This will: - // - Check if the table declared on the route field exists in the iceberg catalog. - // - If it does, it will try to load it to memory and map the name with the Table object so - // that we can dynamically send messages to it's correct destination. - async fn ensure_table_exists( - &mut self, - route_field_val: &str, - catalog: &dyn Catalog, - ) -> Result { - let sliced_table = slice_user_table(route_field_val); - let table_ident = &TableIdent::from_strs(&sliced_table).map_err(|err| { - error!("Failed to load table from catalog: {}. ", err); - Error::InitError(err.to_string()) - })?; - - if !catalog.table_exists(table_ident).await.map_err(|err| { - error!("Failed to load table from catalog: {}. ", err); - Error::InitError(err.to_string()) - })? { - return Ok(false); - } - - let table_ident = TableIdent::from_strs(&sliced_table).map_err(|err| { - error!("Failed to load table from catalog: {}.", err); - Error::InitError(err.to_string()) - })?; - - let table = catalog.load_table(&table_ident).await.map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::InitError(err.to_string()) - })?; - - self.tables_to_write - .insert(route_field_val.to_string(), table); - - Ok(true) - } -} - -impl DynamicRouter { - pub fn new(catalog: Box, route_field: String) -> Self { - Self { - catalog, - route_field, - } - } - - fn extract_route_field(&self, message: &ConsumedMessage) -> Option { - match &message.payload { - Payload::Json(payload) => payload - .as_object() - .and_then(|obj| obj.get(&self.route_field)) - .map(|val| val.to_string()), - _ => { - warn!("Unsupported format for iceberg connector"); - None - } - } - } -} - -#[async_trait] -impl Router for DynamicRouter { - async fn route_data( - &self, - messages_metadata: MessagesMetadata, - messages: Vec, - ) -> Result<(), crate::Error> { - let mut writer = DynamicWriter::new(); - for message in messages { - let message = Arc::new(message); - let route_field_val = match self.extract_route_field(&message) { - Some(val) => val, - None => continue, - }; - - if writer.push_to_existing(&route_field_val, &message) { - continue; - } - - let route_field_val_cloned = route_field_val.clone(); - - if writer - .ensure_table_exists(&route_field_val_cloned, self.catalog.as_ref()) - .await? - { - if let Some(msgs) = writer.table_to_message.get_mut(&route_field_val_cloned) { - msgs.push(message); - } else { - let message_vec: Vec> = vec![message]; - writer - .table_to_message - .insert(route_field_val_cloned, message_vec); - } - } - } - - for (table_name, table_obj) in &writer.tables_to_write { - let batch_messages = match writer.table_to_message.get(table_name) { - Some(m) => m, - None => continue, - }; - write_data( - batch_messages.iter().map(Arc::clone), - table_obj, - self.catalog.as_ref(), - messages_metadata.schema, - ) - .await?; - info!( - "Dynamically routed {} messages to {} iceberg table", - batch_messages.len(), - table_name - ); - } - - Ok(()) - } -} - -#[derive(Debug)] -pub(crate) struct StaticRouter { - tables: Vec
, - catalog: Box, -} - -impl StaticRouter { - pub async fn new( - catalog: Box, - declared_tables: &Vec, - ) -> Result { - let mut tables: Vec
= Vec::with_capacity(declared_tables.len()); - let mut tables_found = 0; - for declared_table in declared_tables { - let sliced_table = slice_user_table(declared_table); - let table_ident = &TableIdent::from_strs(sliced_table.clone()).map_err(|err| { - error!("Failed to load table from catalog: {}. ", err); - Error::InitError(err.to_string()) - })?; - let exists = catalog.table_exists(table_ident).await.map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::InitError(err.to_string()) - })?; - - if !exists { - continue; - }; - - tables_found += 1; - let table = catalog.load_table(table_ident).await.map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::InitError(err.to_string()) - })?; - tables.push(table); - } - info!( - "Static router found {} tables on iceberg catalog from {} tables declared", - tables_found, - declared_tables.len() - ); - Ok(StaticRouter { tables, catalog }) - } -} - -#[async_trait] -impl Router for StaticRouter { - async fn route_data( - &self, - messages_metadata: MessagesMetadata, - messages: Vec, - ) -> Result<(), crate::Error> { - for table in &self.tables { - write_data( - &messages, - table, - self.catalog.as_ref(), - messages_metadata.schema, - ) - .await?; - info!( - "Routed {} messages to iceberg table {} successfully", - messages.len(), - table.identifier().name() - ); - } - - Ok(()) - } -} - #[async_trait] pub trait Router: std::fmt::Debug + Sync + Send { async fn route_data( diff --git a/core/connectors/sinks/iceberg_sink/src/router/static_router.rs b/core/connectors/sinks/iceberg_sink/src/router/static_router.rs new file mode 100644 index 000000000..ce875c9de --- /dev/null +++ b/core/connectors/sinks/iceberg_sink/src/router/static_router.rs @@ -0,0 +1,96 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use crate::router::{Router, write_data}; +use crate::slice_user_table; +use async_trait::async_trait; +use iceberg::Catalog; +use iceberg::TableIdent; +use iceberg::table::Table; +use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata}; +use tracing::{error, info}; + +#[derive(Debug)] +pub(crate) struct StaticRouter { + tables: Vec
, + catalog: Box, +} + +impl StaticRouter { + pub async fn new( + catalog: Box, + declared_tables: &Vec, + ) -> Result { + let mut tables: Vec
= Vec::with_capacity(declared_tables.len()); + let mut tables_found = 0; + for declared_table in declared_tables { + let sliced_table = slice_user_table(declared_table); + let table_ident = &TableIdent::from_strs(sliced_table.clone()).map_err(|err| { + error!("Failed to load table from catalog: {}. ", err); + Error::InitError(err.to_string()) + })?; + let exists = catalog.table_exists(table_ident).await.map_err(|err| { + error!("Failed to load table from catalog: {}", err); + Error::InitError(err.to_string()) + })?; + + if !exists { + continue; + }; + + tables_found += 1; + let table = catalog.load_table(table_ident).await.map_err(|err| { + error!("Failed to load table from catalog: {}", err); + Error::InitError(err.to_string()) + })?; + tables.push(table); + } + info!( + "Static router found {} tables on iceberg catalog from {} tables declared", + tables_found, + declared_tables.len() + ); + Ok(StaticRouter { tables, catalog }) + } +} + +#[async_trait] +impl Router for StaticRouter { + async fn route_data( + &self, + messages_metadata: MessagesMetadata, + messages: Vec, + ) -> Result<(), crate::Error> { + for table in &self.tables { + write_data( + &messages, + table, + self.catalog.as_ref(), + messages_metadata.schema, + ) + .await?; + info!( + "Routed {} messages to iceberg table {} successfully", + messages.len(), + table.identifier().name() + ); + } + + Ok(()) + } +} diff --git a/core/connectors/sinks/iceberg_sink/src/sink.rs b/core/connectors/sinks/iceberg_sink/src/sink.rs new file mode 100644 index 000000000..47184566e --- /dev/null +++ b/core/connectors/sinks/iceberg_sink/src/sink.rs @@ -0,0 +1,91 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use crate::{ + IcebergSink, + catalog::init_catalog, + router::{dynamic_router::DynamicRouter, static_router::StaticRouter}, +}; +use async_trait::async_trait; +use iceberg::Catalog; +use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata, Sink, TopicMetadata}; +use tracing::{debug, error, info}; + +#[async_trait] +impl Sink for IcebergSink { + async fn open(&mut self) -> Result<(), Error> { + info!( + "Opened Iceberg sink connector with ID: {} for URL: {}", + self.id, self.config.uri + ); + + info!( + "Configuring Iceberg catalog with the following config:\n-region: {}\n-url: {}\n-store class: {}\n-catalog type: {}\n", + self.config.store_region, + self.config.store_url, + self.config.store_class, + self.config.catalog_type + ); + + let catalog: Box = init_catalog(&self.config).await?; + + if self.config.dynamic_routing { + self.router = Some(Box::new(DynamicRouter::new( + catalog, + self.config.dynamic_route_field.clone(), + ))) + } else { + self.router = Some(Box::new( + StaticRouter::new(catalog, &self.config.tables).await?, + )); + } + + Ok(()) + } + + async fn consume( + &self, + _topic_metadata: &TopicMetadata, + messages_metadata: MessagesMetadata, + messages: Vec, + ) -> Result<(), Error> { + debug!( + "Iceberg sink with ID: {} received: {} messages, format: {}", + self.id, + messages.len(), + messages_metadata.schema + ); + + match &self.router { + Some(router) => router.route_data(messages_metadata, messages).await?, + None => { + error!("Iceberg connector has no router configured"); + return Err(Error::InvalidConfig); + } + }; + + debug!("Finished successfully"); + + Ok(()) + } + + async fn close(&mut self) -> Result<(), Error> { + info!("Iceberg sink connector with ID: {} is closed.", self.id); + Ok(()) + } +} From c7b6a2f2661f6f3b5f4a5035ababd28cf4eab087 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Thu, 25 Sep 2025 20:45:53 -0700 Subject: [PATCH 26/28] Reduce allocations for arrow_json reader --- .../iceberg_sink/src/router/arrow_streamer.rs | 79 +++++++++++++++++ .../iceberg_sink/src/router/dynamic_router.rs | 84 ++++++++----------- .../sinks/iceberg_sink/src/router/mod.rs | 54 ++++++------ .../iceberg_sink/src/router/static_router.rs | 57 +++++++------ 4 files changed, 176 insertions(+), 98 deletions(-) create mode 100644 core/connectors/sinks/iceberg_sink/src/router/arrow_streamer.rs diff --git a/core/connectors/sinks/iceberg_sink/src/router/arrow_streamer.rs b/core/connectors/sinks/iceberg_sink/src/router/arrow_streamer.rs new file mode 100644 index 000000000..793cd6988 --- /dev/null +++ b/core/connectors/sinks/iceberg_sink/src/router/arrow_streamer.rs @@ -0,0 +1,79 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use simd_json::OwnedValue; +use std::io::{self, BufRead, Cursor, Read}; +use std::slice::Iter; + +pub struct JsonArrowReader<'a> { + values: Iter<'a, &'a OwnedValue>, + cursor: Cursor>, +} + +impl<'a> JsonArrowReader<'a> { + pub fn new(values: &'a [&OwnedValue]) -> Self { + Self { + values: values.iter(), + cursor: Cursor::new(Vec::new()), + } + } + + fn load_next(&mut self) -> io::Result { + if let Some(val) = self.values.next() { + let mut buf = Vec::new(); + simd_json::to_writer(&mut buf, val).map_err(io::Error::other)?; + buf.push(b'\n'); + self.cursor = Cursor::new(buf); + Ok(true) + } else { + Ok(false) + } + } +} + +impl<'a> Read for JsonArrowReader<'a> { + fn read(&mut self, out: &mut [u8]) -> io::Result { + loop { + let n = self.cursor.read(out)?; + if n > 0 { + return Ok(n); + } + if !self.load_next()? { + return Ok(0); + } + } + } +} + +impl<'a> BufRead for JsonArrowReader<'a> { + fn fill_buf(&mut self) -> io::Result<&[u8]> { + loop { + if self.cursor.position() < self.cursor.get_ref().len() as u64 { + return Ok(&self.cursor.get_ref()[self.cursor.position() as usize..]); + } + + if !self.load_next()? { + return Ok(&[]); + } + } + } + + fn consume(&mut self, amt: usize) { + self.cursor.consume(amt) + } +} diff --git a/core/connectors/sinks/iceberg_sink/src/router/dynamic_router.rs b/core/connectors/sinks/iceberg_sink/src/router/dynamic_router.rs index 331b7dab2..ee204fe26 100644 --- a/core/connectors/sinks/iceberg_sink/src/router/dynamic_router.rs +++ b/core/connectors/sinks/iceberg_sink/src/router/dynamic_router.rs @@ -16,17 +16,14 @@ * under the License. */ -use crate::router::{Router, write_data}; -use crate::slice_user_table; +use crate::router::{Router, is_valid_namespaced_table, table_exists, write_data}; use async_trait::async_trait; use iceberg::Catalog; -use iceberg::TableIdent; use iceberg::table::Table; use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata, Payload}; use simd_json::base::ValueAsObject; use std::collections::HashMap; -use std::sync::Arc; -use tracing::{error, info, warn}; +use tracing::{info, warn}; #[derive(Debug)] pub struct DynamicRouter { @@ -36,7 +33,7 @@ pub struct DynamicRouter { pub struct DynamicWriter { pub tables_to_write: HashMap, - pub table_to_message: HashMap>>, + pub table_to_message: HashMap>, } impl DynamicWriter { @@ -49,51 +46,32 @@ impl DynamicWriter { } } - fn push_to_existing(&mut self, route_field_val: &str, message: &Arc) -> bool { + fn push_to_existing( + &mut self, + route_field_val: &str, + message: ConsumedMessage, + ) -> Option { if let Some(message_vec) = self.table_to_message.get_mut(route_field_val) { - message_vec.push(Arc::clone(message)); - true + message_vec.push(message); + None } else { - false + Some(message) } } - // This will: - // - Check if the table declared on the route field exists in the iceberg catalog. - // - If it does, it will try to load it to memory and map the name with the Table object so - // that we can dynamically send messages to it's correct destination. - async fn ensure_table_exists( + async fn load_table_if_exists( &mut self, route_field_val: &str, catalog: &dyn Catalog, - ) -> Result { - let sliced_table = slice_user_table(route_field_val); - let table_ident = &TableIdent::from_strs(&sliced_table).map_err(|err| { - error!("Failed to load table from catalog: {}. ", err); - Error::InitError(err.to_string()) - })?; - - if !catalog.table_exists(table_ident).await.map_err(|err| { - error!("Failed to load table from catalog: {}. ", err); - Error::InitError(err.to_string()) - })? { - return Ok(false); - } - - let table_ident = TableIdent::from_strs(&sliced_table).map_err(|err| { - error!("Failed to load table from catalog: {}.", err); - Error::InitError(err.to_string()) - })?; - - let table = catalog.load_table(&table_ident).await.map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::InitError(err.to_string()) - })?; + ) -> Result<(), Error> { + let table = table_exists(route_field_val, catalog) + .await + .ok_or(Error::InvalidState)?; self.tables_to_write .insert(route_field_val.to_string(), table); - Ok(true) + Ok(()) } } @@ -128,26 +106,35 @@ impl Router for DynamicRouter { ) -> Result<(), crate::Error> { let mut writer = DynamicWriter::new(); for message in messages { - let message = Arc::new(message); let route_field_val = match self.extract_route_field(&message) { Some(val) => val, None => continue, }; - if writer.push_to_existing(&route_field_val, &message) { + let message = match writer.push_to_existing(&route_field_val, message) { + Some(msg) => msg, + None => continue, + }; + + if !is_valid_namespaced_table(&route_field_val) { + warn!( + "Found invalid route field name on message: {}. Route fields should have at least 1 namespace separated by '.' character before the table", + route_field_val + ); continue; } let route_field_val_cloned = route_field_val.clone(); if writer - .ensure_table_exists(&route_field_val_cloned, self.catalog.as_ref()) - .await? + .load_table_if_exists(&route_field_val_cloned, self.catalog.as_ref()) + .await + .is_ok() { if let Some(msgs) = writer.table_to_message.get_mut(&route_field_val_cloned) { msgs.push(message); } else { - let message_vec: Vec> = vec![message]; + let message_vec: Vec = vec![message]; writer .table_to_message .insert(route_field_val_cloned, message_vec); @@ -156,12 +143,15 @@ impl Router for DynamicRouter { } for (table_name, table_obj) in &writer.tables_to_write { - let batch_messages = match writer.table_to_message.get(table_name) { + let batch_messages = match writer.table_to_message.remove(table_name) { Some(m) => m, None => continue, }; + + let data: Vec = batch_messages.into_iter().map(|m| m.payload).collect(); + write_data( - batch_messages.iter().map(Arc::clone), + &data, table_obj, self.catalog.as_ref(), messages_metadata.schema, @@ -169,7 +159,7 @@ impl Router for DynamicRouter { .await?; info!( "Dynamically routed {} messages to {} iceberg table", - batch_messages.len(), + data.len(), table_name ); } diff --git a/core/connectors/sinks/iceberg_sink/src/router/mod.rs b/core/connectors/sinks/iceberg_sink/src/router/mod.rs index e18d22390..b29ebba1a 100644 --- a/core/connectors/sinks/iceberg_sink/src/router/mod.rs +++ b/core/connectors/sinks/iceberg_sink/src/router/mod.rs @@ -16,8 +16,11 @@ * under the License. */ +use crate::router::arrow_streamer::JsonArrowReader; +use crate::slice_user_table; use arrow_json::ReaderBuilder; use async_trait::async_trait; +use iceberg::TableIdent; use iceberg::arrow::schema_to_arrow_schema; use iceberg::spec::{Literal, PrimitiveLiteral, PrimitiveType, Struct, StructType}; use iceberg::table::Table; @@ -31,14 +34,26 @@ use iceberg::{ }; use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata, Payload, Schema}; use parquet::file::properties::WriterProperties; -use std::io::Cursor; use std::sync::Arc; use tracing::{error, warn}; use uuid::Uuid; +mod arrow_streamer; pub mod dynamic_router; pub mod static_router; +pub fn is_valid_namespaced_table(input: &str) -> bool { + let parts: Vec<&str> = input.split('.').collect(); + parts.len() >= 2 && parts.iter().all(|part| !part.is_empty()) +} + +async fn table_exists(route_field_val: &str, catalog: &dyn Catalog) -> Option
{ + let sliced_table = slice_user_table(route_field_val); + let table_ident = TableIdent::from_strs(&sliced_table).ok()?; + + catalog.load_table(&table_ident).await.ok() +} + pub fn primitive_type_to_literal(pt: &PrimitiveType) -> Result { match pt { PrimitiveType::Boolean => Ok(PrimitiveLiteral::Boolean(false)), @@ -82,16 +97,12 @@ fn get_partition_type_value(default_partition_type: &StructType) -> Result( - messages: I, +async fn write_data( + messages: &[Payload], table: &Table, catalog: &dyn Catalog, messages_schema: Schema, -) -> Result<(), Error> -where - I: IntoIterator, - M: std::ops::Deref, -{ +) -> Result<(), Error> { let location = DefaultLocationGenerator::new(table.metadata().clone()).map_err(|err| { error!( "Failed to get location on table: {}. Error: {}", @@ -126,28 +137,21 @@ where Error::InitError(err.to_string()) })?; - let json_messages = messages - .into_iter() - .filter_map(|record| match &record.payload { - Payload::Json(record) => simd_json::to_string(&record).ok(), + let msgs: Vec<&simd_json::OwnedValue> = messages + .iter() + .filter_map(|payload| match payload { + Payload::Json(value) => Some(value), _ => { - warn!("Unsupported payload format: {}", messages_schema); + warn!( + "Unsupported type of payload, expected JSON, got {}", + messages_schema.to_string() + ); None } }) - .collect::>() - .join("\n"); - - if json_messages.is_empty() { - error!( - "Could not serialize payload, expected JSON format, got {} instead", - messages_schema - ); - return Err(Error::InvalidPayloadType); - } - - let cursor = Cursor::new(json_messages); + .collect(); + let cursor = JsonArrowReader::new(msgs.as_slice()); let reader = ReaderBuilder::new(Arc::new( schema_to_arrow_schema(&table.metadata().current_schema().clone()).map_err(|err| { error!( diff --git a/core/connectors/sinks/iceberg_sink/src/router/static_router.rs b/core/connectors/sinks/iceberg_sink/src/router/static_router.rs index ce875c9de..bc344eb5c 100644 --- a/core/connectors/sinks/iceberg_sink/src/router/static_router.rs +++ b/core/connectors/sinks/iceberg_sink/src/router/static_router.rs @@ -16,14 +16,12 @@ * under the License. */ -use crate::router::{Router, write_data}; -use crate::slice_user_table; +use crate::router::{Router, is_valid_namespaced_table, table_exists, write_data}; use async_trait::async_trait; use iceberg::Catalog; -use iceberg::TableIdent; use iceberg::table::Table; -use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata}; -use tracing::{error, info}; +use iggy_connector_sdk::{ConsumedMessage, Error, MessagesMetadata, Payload}; +use tracing::{error, info, warn}; #[derive(Debug)] pub(crate) struct StaticRouter { @@ -37,34 +35,36 @@ impl StaticRouter { declared_tables: &Vec, ) -> Result { let mut tables: Vec
= Vec::with_capacity(declared_tables.len()); - let mut tables_found = 0; for declared_table in declared_tables { - let sliced_table = slice_user_table(declared_table); - let table_ident = &TableIdent::from_strs(sliced_table.clone()).map_err(|err| { - error!("Failed to load table from catalog: {}. ", err); - Error::InitError(err.to_string()) - })?; - let exists = catalog.table_exists(table_ident).await.map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::InitError(err.to_string()) - })?; - - if !exists { + if !is_valid_namespaced_table(declared_table) { + error!( + "Declared table {} is not valid. It has to include at least one namespace before the table name separated by '.' character", + declared_table + ); continue; - }; + } - tables_found += 1; - let table = catalog.load_table(table_ident).await.map_err(|err| { - error!("Failed to load table from catalog: {}", err); - Error::InitError(err.to_string()) - })?; + let table = match table_exists(declared_table, catalog.as_ref()).await { + Some(table) => table, + None => { + warn!( + "Declared table {} doesn't exist in the configured catalog. Skipping...", + declared_table + ); + continue; + } + }; tables.push(table); } info!( "Static router found {} tables on iceberg catalog from {} tables declared", - tables_found, + tables.len(), declared_tables.len() ); + if tables.is_empty() { + error!("No valid tables found. Can't initiate Iceberg connector"); + return Err(Error::InvalidConfig); + } Ok(StaticRouter { tables, catalog }) } } @@ -76,9 +76,14 @@ impl Router for StaticRouter { messages_metadata: MessagesMetadata, messages: Vec, ) -> Result<(), crate::Error> { + let data: Vec = messages + .into_iter() + .map(|m: ConsumedMessage| m.payload) + .collect(); + for table in &self.tables { write_data( - &messages, + &data, table, self.catalog.as_ref(), messages_metadata.schema, @@ -86,7 +91,7 @@ impl Router for StaticRouter { .await?; info!( "Routed {} messages to iceberg table {} successfully", - messages.len(), + data.len(), table.identifier().name() ); } From 7f0039b8565f149113497366a8e52898e5fd608d Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Sat, 27 Sep 2025 15:01:07 -0700 Subject: [PATCH 27/28] markdownlint --- core/connectors/sinks/iceberg_sink/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/connectors/sinks/iceberg_sink/README.md b/core/connectors/sinks/iceberg_sink/README.md index 035ed1659..52887329b 100644 --- a/core/connectors/sinks/iceberg_sink/README.md +++ b/core/connectors/sinks/iceberg_sink/README.md @@ -1,6 +1,6 @@ # Iceberg Sink Connector -The Iceberg Sink Connector allows you to consume messages from Iggy topics and store them in Iceberg tables. +The Iceberg Sink Connector allows you to consume messages from Iggy topics and store them in Iceberg tables. ## Features @@ -26,6 +26,7 @@ store_secret_access_key = "password" store_region = "us-east-1" store_class = "s3" ``` + # Configuration Options - **tables**: The names of the Iceberg tables you want to statically route Iggy messages to. The name should include the table’s namespace, separated by a dot (`.`). @@ -46,10 +47,10 @@ If you don't know the names of the Iceberg tables you want to route data to in a Insert a field in your Iggy messages with the name of the Iceberg table the message should be routed to. The Iggy connector will parse this field at runtime and route the message to the correct table. The Iggy Iceberg Connector will skip messages in the following cases: + - The table declared in the message field does not exist. - The message does not contain the field specified in the `dynamic_route_field` configuration option. - ### Dynamic routing configuration example ```toml @@ -76,6 +77,6 @@ value.static = "nyc.users" **Note:** The value in the message field **must** contain both the namespace and the table name, separated by a dot (`.`). Example: + - Namespace: `nyc` - Table name: `users` - From 9bd6322a307d539aab8367f8d834d490b71a1277 Mon Sep 17 00:00:00 2001 From: Edgar Modesto Date: Sun, 28 Sep 2025 12:53:32 -0700 Subject: [PATCH 28/28] Fix CI --- Cargo.toml | 6 +- DEPENDENCIES.md | 127 +++++++++++++++--- core/connectors/sinks/iceberg_sink/Cargo.toml | 2 +- core/connectors/sinks/iceberg_sink/README.md | 46 +++---- .../iggy_iceberg_sink_config.toml | 2 +- 5 files changed, 137 insertions(+), 46 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8227a2f84..b5ebd4496 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,11 +48,11 @@ resolver = "2" [workspace.dependencies] aes-gcm = "0.10.3" ahash = { version = "0.8.12", features = ["serde"] } +anyhow = "1.0.100" +argon2 = "0.5.3" arrow = "55.2.0" arrow-array = "55.2.0" arrow-json = "55.2.0" -anyhow = "1.0.100" -argon2 = "0.5.3" async-broadcast = "0.7.2" async-dropper = { version = "0.3.1", features = ["tokio", "simple"] } async-trait = "0.1.89" @@ -104,10 +104,10 @@ flume = "0.11.1" futures = "0.3.31" futures-util = "0.3.31" human-repr = "1.1.0" +humantime = "2.3.0" iceberg = "0.6.0" iceberg-catalog-glue = "0.6.0" iceberg-catalog-rest = "0.6.0" -humantime = "2.3.0" iggy = { path = "core/sdk", version = "0.7.0" } iggy_binary_protocol = { path = "core/binary_protocol", version = "0.7.0" } iggy_common = { path = "core/common", version = "0.7.0" } diff --git a/DEPENDENCIES.md b/DEPENDENCIES.md index 916411130..2432a3637 100644 --- a/DEPENDENCIES.md +++ b/DEPENDENCIES.md @@ -14,6 +14,7 @@ actix-web: 4.11.0, "Apache-2.0 OR MIT", actix-web-codegen: 4.3.0, "Apache-2.0 OR MIT", addr2line: 0.24.2, "Apache-2.0 OR MIT", adler2: 2.0.1, "0BSD OR Apache-2.0 OR MIT", +adler32: 1.2.0, "Zlib", aead: 0.5.2, "Apache-2.0 OR MIT", aes: 0.8.4, "Apache-2.0 OR MIT", aes-gcm: 0.10.3, "Apache-2.0 OR MIT", @@ -31,11 +32,25 @@ anstyle-query: 1.1.4, "Apache-2.0 OR MIT", anstyle-wincon: 3.0.10, "Apache-2.0 OR MIT", anyhow: 1.0.100, "Apache-2.0 OR MIT", anymap2: 0.13.0, "Apache-2.0 OR MIT", +apache-avro: 0.17.0, "Apache-2.0", arbitrary: 1.4.2, "Apache-2.0 OR MIT", arc-swap: 1.7.1, "Apache-2.0 OR MIT", argon2: 0.5.3, "Apache-2.0 OR MIT", +array-init: 2.1.0, "Apache-2.0 OR MIT", arrayref: 0.3.9, "BSD-2-Clause", arrayvec: 0.7.6, "Apache-2.0 OR MIT", +arrow-arith: 55.2.0, "Apache-2.0", +arrow-array: 55.2.0, "Apache-2.0", +arrow-buffer: 55.2.0, "Apache-2.0", +arrow-cast: 55.2.0, "Apache-2.0", +arrow-data: 55.2.0, "Apache-2.0", +arrow-ipc: 55.2.0, "Apache-2.0", +arrow-json: 55.2.0, "Apache-2.0", +arrow-ord: 55.2.0, "Apache-2.0", +arrow-schema: 55.2.0, "Apache-2.0", +arrow-select: 55.2.0, "Apache-2.0", +arrow-string: 55.2.0, "Apache-2.0", +as-any: 0.3.2, "Apache-2.0 OR MIT", assert_cmd: 2.0.17, "Apache-2.0 OR MIT", async-broadcast: 0.7.2, "Apache-2.0 OR MIT", async-compression: 0.4.30, "Apache-2.0 OR MIT", @@ -54,18 +69,39 @@ atomic-polyfill: 1.0.3, "Apache-2.0 OR MIT", atomic-waker: 1.1.2, "Apache-2.0 OR MIT", attohttpc: 0.30.1, "MPL-2.0", autocfg: 1.5.0, "Apache-2.0 OR MIT", +aws-config: 1.8.6, "Apache-2.0", +aws-credential-types: 1.2.6, "Apache-2.0", aws-creds: 0.39.0, "MIT", -aws-lc-rs: 1.14.0, "(Apache-2.0 OR ISC) AND ISC", -aws-lc-sys: 0.31.0, "(Apache-2.0 OR ISC) AND ISC AND OpenSSL", +aws-lc-rs: 1.14.1, "(Apache-2.0 OR ISC) AND ISC", +aws-lc-sys: 0.32.0, "(Apache-2.0 OR ISC) AND ISC AND OpenSSL", aws-region: 0.28.0, "MIT", +aws-runtime: 1.5.10, "Apache-2.0", +aws-sdk-glue: 1.120.0, "Apache-2.0", +aws-sdk-sso: 1.84.0, "Apache-2.0", +aws-sdk-ssooidc: 1.86.0, "Apache-2.0", +aws-sdk-sts: 1.86.0, "Apache-2.0", +aws-sigv4: 1.3.4, "Apache-2.0", +aws-smithy-async: 1.2.5, "Apache-2.0", +aws-smithy-http: 0.62.3, "Apache-2.0", +aws-smithy-http-client: 1.1.1, "Apache-2.0", +aws-smithy-json: 0.61.5, "Apache-2.0", +aws-smithy-observability: 0.1.3, "Apache-2.0", +aws-smithy-query: 0.60.7, "Apache-2.0", +aws-smithy-runtime: 1.9.2, "Apache-2.0", +aws-smithy-runtime-api: 1.9.0, "Apache-2.0", +aws-smithy-types: 1.3.2, "Apache-2.0", +aws-smithy-xml: 0.60.10, "Apache-2.0", +aws-types: 1.3.8, "Apache-2.0", axum: 0.7.9, "MIT", axum: 0.8.4, "MIT", axum-core: 0.4.5, "MIT", axum-core: 0.5.2, "MIT", axum-server: 0.7.2, "MIT", +backon: 1.5.2, "Apache-2.0", backtrace: 0.3.75, "Apache-2.0 OR MIT", base64: 0.21.7, "Apache-2.0 OR MIT", base64: 0.22.1, "Apache-2.0 OR MIT", +base64-simd: 0.8.0, "MIT", base64ct: 1.8.0, "Apache-2.0 OR MIT", bdd: 0.0.1, "Apache-2.0", beef: 0.5.2, "Apache-2.0 OR MIT", @@ -73,6 +109,8 @@ bench-dashboard-frontend: 0.4.1, "Apache-2.0", bench-dashboard-shared: 0.1.0, "Apache-2.0", bench-report: 0.2.2, "Apache-2.0", bench-runner: 0.1.0, "Apache-2.0", +bigdecimal: 0.4.8, "Apache-2.0 OR MIT", +bimap: 0.6.3, "Apache-2.0 OR MIT", bincode: 1.3.3, "MIT", bincode: 2.0.1, "MIT", bincode_derive: 2.0.1, "MIT", @@ -102,6 +140,7 @@ bytecount: 0.6.9, "Apache-2.0 OR MIT", bytemuck: 1.23.2, "Apache-2.0 OR MIT OR Zlib", byteorder: 1.5.0, "MIT OR Unlicense", bytes: 1.10.1, "MIT", +bytes-utils: 0.1.4, "Apache-2.0 OR MIT", bytestring: 1.5.0, "Apache-2.0 OR MIT", bzip2: 0.6.0, "Apache-2.0 OR MIT", camino: 1.2.0, "Apache-2.0 OR MIT", @@ -144,11 +183,14 @@ constant_time_eq: 0.3.1, "Apache-2.0 OR CC0-1.0 OR MIT-0", convert_case: 0.6.0, "MIT", convert_case: 0.7.1, "MIT", cookie: 0.16.2, "Apache-2.0 OR MIT", +core-foundation: 0.9.4, "Apache-2.0 OR MIT", core-foundation: 0.10.1, "Apache-2.0 OR MIT", core-foundation-sys: 0.8.7, "Apache-2.0 OR MIT", +core2: 0.4.0, "Apache-2.0 OR MIT", cpufeatures: 0.2.17, "Apache-2.0 OR MIT", crc: 3.3.0, "Apache-2.0 OR MIT", crc-catalog: 2.4.0, "Apache-2.0 OR MIT", +crc32c: 0.6.8, "Apache-2.0 OR MIT", crc32fast: 1.5.0, "Apache-2.0 OR MIT", critical-section: 1.2.0, "Apache-2.0 OR MIT", crossbeam: 0.8.4, "Apache-2.0 OR MIT", @@ -173,12 +215,13 @@ darling_core: 0.20.11, "MIT", darling_core: 0.21.3, "MIT", darling_macro: 0.20.11, "MIT", darling_macro: 0.21.3, "MIT", +dary_heap: 0.3.8, "Apache-2.0 OR MIT", dashmap: 6.1.0, "MIT", dbus: 0.9.9, "Apache-2.0 OR MIT", dbus-secret-service: 4.1.0, "Apache-2.0 OR MIT", deflate64: 0.1.9, "MIT", der: 0.7.10, "Apache-2.0 OR MIT", -deranged: 0.5.3, "Apache-2.0 OR MIT", +deranged: 0.5.4, "Apache-2.0 OR MIT", derive-new: 0.7.0, "MIT", derive_arbitrary: 1.4.2, "Apache-2.0 OR MIT", derive_builder: 0.20.2, "Apache-2.0 OR MIT", @@ -193,6 +236,7 @@ dircpy: 0.3.19, "MIT", dirs: 6.0.0, "Apache-2.0 OR MIT", dirs-sys: 0.5.0, "Apache-2.0 OR MIT", displaydoc: 0.2.5, "Apache-2.0 OR MIT", +dissimilar: 1.0.10, "Apache-2.0", dlopen2: 0.8.0, "Custom License File", dlopen2_derive: 0.4.1, "Custom License File", dlv-list: 0.5.2, "Apache-2.0 OR MIT", @@ -224,6 +268,7 @@ etcetera: 0.8.0, "Apache-2.0 OR MIT", etcetera: 0.10.0, "Apache-2.0 OR MIT", event-listener: 5.4.1, "Apache-2.0 OR MIT", event-listener-strategy: 0.5.4, "Apache-2.0 OR MIT", +expect-test: 1.5.1, "Apache-2.0 OR MIT", ext-trait: 1.0.1, "Apache-2.0 OR MIT OR Zlib", ext-trait-proc_macros: 1.0.1, "Apache-2.0 OR MIT OR Zlib", extension-traits: 1.0.1, "Apache-2.0 OR MIT OR Zlib", @@ -235,7 +280,7 @@ figment: 0.10.19, "Apache-2.0 OR MIT", file-operation: 0.8.4, "MIT", filetime: 0.2.26, "Apache-2.0 OR MIT", find-msvc-tools: 0.1.2, "Apache-2.0 OR MIT", -flatbuffers: 25.2.10, "Apache-2.0", +flatbuffers: 25.9.23, "Apache-2.0", flate2: 1.1.2, "Apache-2.0 OR MIT", float-cmp: 0.10.0, "MIT", flume: 0.11.1, "Apache-2.0 OR MIT", @@ -261,7 +306,6 @@ futures-sink: 0.3.31, "Apache-2.0 OR MIT", futures-task: 0.3.31, "Apache-2.0 OR MIT", futures-timer: 3.0.3, "Apache-2.0 OR MIT", futures-util: 0.3.31, "Apache-2.0 OR MIT", -generator: 0.8.7, "Apache-2.0 OR MIT", generic-array: 0.14.7, "MIT", getrandom: 0.2.16, "Apache-2.0 OR MIT", getrandom: 0.3.3, "Apache-2.0 OR MIT", @@ -303,6 +347,7 @@ gloo-worker-macros: 0.1.0, "Apache-2.0 OR MIT", governor: 0.10.1, "MIT", h2: 0.3.27, "MIT", h2: 0.4.12, "MIT", +half: 2.6.0, "Apache-2.0 OR MIT", halfbrown: 0.3.0, "Apache-2.0 OR MIT", handlebars: 6.3.2, "MIT", hash32: 0.2.1, "Apache-2.0 OR MIT", @@ -323,6 +368,7 @@ home: 0.5.11, "Apache-2.0 OR MIT", hostname: 0.4.1, "MIT", http: 0.2.12, "Apache-2.0 OR MIT", http: 1.3.1, "Apache-2.0 OR MIT", +http-body: 0.4.6, "MIT", http-body: 1.0.1, "MIT", http-body-util: 0.1.3, "MIT", http-range: 0.1.5, "MIT", @@ -330,14 +376,19 @@ httparse: 1.10.1, "Apache-2.0 OR MIT", httpdate: 1.0.3, "Apache-2.0 OR MIT", human-repr: 1.1.0, "MIT", humantime: 2.3.0, "Apache-2.0 OR MIT", +hyper: 0.14.32, "MIT", hyper: 1.7.0, "MIT", hyper-named-pipe: 0.1.0, "Apache-2.0", +hyper-rustls: 0.24.2, "Apache-2.0 OR ISC OR MIT", hyper-rustls: 0.27.7, "Apache-2.0 OR ISC OR MIT", hyper-timeout: 0.5.2, "Apache-2.0 OR MIT", hyper-util: 0.1.17, "MIT", hyperlocal: 0.9.1, "MIT", iana-time-zone: 0.1.64, "Apache-2.0 OR MIT", iana-time-zone-haiku: 0.1.2, "Apache-2.0 OR MIT", +iceberg: 0.6.0, "Apache-2.0", +iceberg-catalog-glue: 0.6.0, "Apache-2.0", +iceberg-catalog-rest: 0.6.0, "Apache-2.0", icu_collections: 2.0.0, "Unicode-3.0", icu_locale_core: 2.0.0, "Unicode-3.0", icu_normalizer: 2.0.0, "Unicode-3.0", @@ -356,6 +407,7 @@ iggy-connectors: 0.1.1, "Apache-2.0", iggy-mcp: 0.1.1, "Apache-2.0", iggy_binary_protocol: 0.7.0, "Apache-2.0", iggy_common: 0.7.0, "Apache-2.0", +iggy_connector_iceberg_sink: 0.1.0, "Apache-2.0", iggy_connector_postgres_sink: 0.1.0, "Apache-2.0", iggy_connector_postgres_source: 0.1.0, "Apache-2.0", iggy_connector_quickwit_sink: 0.1.0, "Apache-2.0", @@ -376,6 +428,7 @@ inotify: 0.11.0, "ISC", inotify-sys: 0.1.5, "ISC", inout: 0.1.4, "Apache-2.0 OR MIT", instant: 0.1.13, "BSD-3-Clause", +integer-encoding: 3.0.4, "MIT", integration: 0.0.1, "Apache-2.0", inventory: 0.3.21, "Apache-2.0 OR MIT", io-uring: 0.7.10, "Apache-2.0 OR MIT", @@ -390,7 +443,7 @@ jiff-static: 0.2.15, "MIT OR Unlicense", jni: 0.21.1, "Apache-2.0 OR MIT", jni-sys: 0.3.0, "Apache-2.0 OR MIT", jobserver: 0.1.34, "Apache-2.0 OR MIT", -js-sys: 0.3.80, "Apache-2.0 OR MIT", +js-sys: 0.3.81, "Apache-2.0 OR MIT", jsonwebtoken: 9.3.1, "MIT", jwalk: 0.8.1, "MIT", keyring: 3.6.3, "Apache-2.0 OR MIT", @@ -402,11 +455,19 @@ lazy-regex-proc_macros: 3.4.1, "MIT", lazy_static: 1.5.0, "Apache-2.0 OR MIT", lending-iterator: 0.1.7, "Apache-2.0 OR MIT OR Zlib", lending-iterator-proc_macros: 0.1.7, "Apache-2.0 OR MIT OR Zlib", +lexical-core: 1.0.6, "Apache-2.0 OR MIT", +lexical-parse-float: 1.0.6, "Apache-2.0 OR MIT", +lexical-parse-integer: 1.0.6, "Apache-2.0 OR MIT", +lexical-util: 1.0.7, "Apache-2.0 OR MIT", +lexical-write-float: 1.0.6, "Apache-2.0 OR MIT", +lexical-write-integer: 1.0.6, "Apache-2.0 OR MIT", libbz2-rs-sys: 0.2.2, "bzip2-1.0.6", -libc: 0.2.175, "Apache-2.0 OR MIT", +libc: 0.2.176, "Apache-2.0 OR MIT", libdbus-sys: 0.2.6, "Apache-2.0 OR MIT", +libflate: 2.1.0, "MIT", +libflate_lz77: 2.1.0, "MIT", libgit2-sys: 0.18.2+1.9.1, "Apache-2.0 OR MIT", -libloading: 0.8.9, "ISC", +libloading: 0.8.8, "ISC", liblzma: 0.4.4, "Apache-2.0 OR MIT", liblzma-sys: 0.4.4, "Apache-2.0 OR MIT", libm: 0.2.15, "MIT", @@ -426,8 +487,8 @@ log: 0.4.28, "Apache-2.0 OR MIT", logos: 0.15.1, "Apache-2.0 OR MIT", logos-codegen: 0.15.1, "Apache-2.0 OR MIT", logos-derive: 0.15.1, "Apache-2.0 OR MIT", -loom: 0.7.2, "MIT", lru-slab: 0.1.2, "Apache-2.0 OR MIT OR Zlib", +lz4_flex: 0.11.5, "MIT", lzma-rust2: 0.13.0, "Apache-2.0", macro_rules_attribute: 0.1.3, "MIT", macro_rules_attribute-proc_macro: 0.1.3, "MIT", @@ -449,7 +510,8 @@ miniz_oxide: 0.8.9, "Apache-2.0 OR MIT OR Zlib", mio: 1.0.4, "MIT", mockall: 0.13.1, "Apache-2.0 OR MIT", mockall_derive: 0.13.1, "Apache-2.0 OR MIT", -moka: 0.12.10, "Apache-2.0 OR MIT", +moka: 0.12.11, "(Apache-2.0 OR MIT) AND Apache-2.0", +murmur3: 0.5.2, "Apache-2.0 OR MIT", nanorand: 0.7.0, "Zlib", never-say-never: 6.6.666, "Apache-2.0 OR MIT OR Zlib", nix: 0.30.1, "MIT", @@ -484,6 +546,7 @@ octocrab: 0.45.0, "Apache-2.0 OR MIT", once_cell: 1.21.3, "Apache-2.0 OR MIT", once_cell_polyfill: 1.70.1, "Apache-2.0 OR MIT", opaque-debug: 0.3.1, "Apache-2.0 OR MIT", +opendal: 0.54.0, "Apache-2.0", openssl: 0.10.73, "Apache-2.0", openssl-macros: 0.1.1, "Apache-2.0 OR MIT", openssl-probe: 0.1.6, "Apache-2.0 OR MIT", @@ -497,12 +560,16 @@ opentelemetry-proto: 0.30.0, "Apache-2.0", opentelemetry-semantic-conventions: 0.30.0, "Apache-2.0", opentelemetry_sdk: 0.30.0, "Apache-2.0", option-ext: 0.2.0, "MPL-2.0", +ordered-float: 2.10.1, "MIT", +ordered-float: 4.6.0, "MIT", ordered-multimap: 0.7.3, "MIT", +outref: 0.5.2, "MIT", parking: 2.2.1, "Apache-2.0 OR MIT", parking_lot: 0.11.2, "Apache-2.0 OR MIT", parking_lot: 0.12.4, "Apache-2.0 OR MIT", parking_lot_core: 0.8.6, "Apache-2.0 OR MIT", parking_lot_core: 0.9.11, "Apache-2.0 OR MIT", +parquet: 55.2.0, "Apache-2.0", parse-display: 0.9.1, "Apache-2.0 OR MIT", parse-display-derive: 0.9.1, "Apache-2.0 OR MIT", passterm: 2.0.1, "BSD-3-Clause", @@ -562,7 +629,9 @@ protox: 0.9.0, "Apache-2.0 OR MIT", protox-parse: 0.9.0, "Apache-2.0 OR MIT", ptr_meta: 0.1.4, "MIT", ptr_meta_derive: 0.1.4, "MIT", +quad-rand: 0.2.3, "MIT", quanta: 0.12.6, "MIT", +quick-xml: 0.37.5, "MIT", quick-xml: 0.38.3, "MIT", quinn: 0.11.9, "Apache-2.0 OR MIT", quinn-proto: 0.11.13, "Apache-2.0 OR MIT", @@ -592,6 +661,7 @@ regex-lite: 0.1.7, "Apache-2.0 OR MIT", regex-syntax: 0.7.5, "Apache-2.0 OR MIT", regex-syntax: 0.8.6, "Apache-2.0 OR MIT", rend: 0.4.2, "MIT", +reqsign: 0.16.5, "Apache-2.0", reqwest: 0.12.23, "Apache-2.0 OR MIT", reqwest-middleware: 0.4.2, "Apache-2.0 OR MIT", reqwest-retry: 0.7.0, "Apache-2.0 OR MIT", @@ -599,8 +669,10 @@ retry-policies: 0.4.0, "Apache-2.0 OR MIT", ring: 0.17.14, "Apache-2.0 AND ISC", rkyv: 0.7.45, "MIT", rkyv_derive: 0.7.45, "MIT", +rle-decode-fast: 1.0.3, "Apache-2.0 OR MIT", rmcp: 0.6.4, "MIT", rmcp-macros: 0.6.4, "MIT", +roaring: 0.10.12, "Apache-2.0 OR MIT", route-recognizer: 0.3.1, "MIT", rsa: 0.9.8, "Apache-2.0 OR MIT", rust-ini: 0.21.3, "MIT", @@ -610,12 +682,16 @@ rustc-demangle: 0.1.26, "Apache-2.0 OR MIT", rustc-hash: 2.1.1, "Apache-2.0 OR MIT", rustc_version: 0.4.1, "Apache-2.0 OR MIT", rustix: 1.1.2, "Apache-2.0 OR Apache-2.0 WITH LLVM-exception OR MIT", +rustls: 0.21.12, "Apache-2.0 OR ISC OR MIT", rustls: 0.23.32, "Apache-2.0 OR ISC OR MIT", +rustls-native-certs: 0.6.3, "Apache-2.0 OR ISC OR MIT", rustls-native-certs: 0.8.1, "Apache-2.0 OR ISC OR MIT", +rustls-pemfile: 1.0.4, "Apache-2.0 OR ISC OR MIT", rustls-pemfile: 2.2.0, "Apache-2.0 OR ISC OR MIT", rustls-pki-types: 1.12.0, "Apache-2.0 OR MIT", rustls-platform-verifier: 0.6.1, "Apache-2.0 OR MIT", rustls-platform-verifier-android: 0.1.1, "Apache-2.0 OR MIT", +rustls-webpki: 0.101.7, "ISC", rustls-webpki: 0.103.6, "ISC", rustversion: 1.0.22, "Apache-2.0 OR MIT", rxml: 0.11.1, "MIT", @@ -627,18 +703,21 @@ schannel: 0.1.28, "MIT", schemars: 0.9.0, "MIT", schemars: 1.0.4, "MIT", schemars_derive: 1.0.4, "MIT", -scoped-tls: 1.0.1, "Apache-2.0 OR MIT", scopeguard: 1.2.0, "Apache-2.0 OR MIT", +sct: 0.7.1, "Apache-2.0 OR ISC OR MIT", sdd: 3.0.10, "Apache-2.0", seahash: 4.1.0, "MIT", sealed: 0.5.0, "Apache-2.0 OR MIT", secrecy: 0.10.3, "Apache-2.0 OR MIT", +security-framework: 2.11.1, "Apache-2.0 OR MIT", security-framework: 3.5.0, "Apache-2.0 OR MIT", security-framework-sys: 2.15.0, "Apache-2.0 OR MIT", semver: 1.0.27, "Apache-2.0 OR MIT", +seq-macro: 0.3.6, "Apache-2.0 OR MIT", serde: 1.0.226, "Apache-2.0 OR MIT", serde-wasm-bindgen: 0.5.0, "MIT", serde-wasm-bindgen: 0.6.5, "MIT", +serde_bytes: 0.11.19, "Apache-2.0 OR MIT", serde_core: 1.0.226, "Apache-2.0 OR MIT", serde_derive: 1.0.226, "Apache-2.0 OR MIT", serde_derive_internals: 0.29.1, "Apache-2.0 OR MIT", @@ -671,6 +750,7 @@ smart-default: 0.7.1, "MIT", smawk: 0.3.2, "MIT", snafu: 0.8.9, "Apache-2.0 OR MIT", snafu-derive: 0.8.9, "Apache-2.0 OR MIT", +snap: 1.1.1, "BSD-3-Clause", socket2: 0.5.10, "Apache-2.0 OR MIT", socket2: 0.6.0, "Apache-2.0 OR MIT", spin: 0.9.8, "MIT", @@ -691,7 +771,9 @@ stringprep: 0.1.5, "Apache-2.0 OR MIT", strsim: 0.11.1, "MIT", structmeta: 0.3.0, "Apache-2.0 OR MIT", structmeta-derive: 0.3.0, "Apache-2.0 OR MIT", +strum: 0.26.3, "MIT", strum: 0.27.2, "MIT", +strum_macros: 0.26.4, "MIT", strum_macros: 0.27.2, "MIT", subtle: 2.6.1, "BSD-3-Clause", syn: 1.0.109, "Apache-2.0 OR MIT", @@ -719,6 +801,8 @@ thiserror: 2.0.16, "Apache-2.0 OR MIT", thiserror-impl: 1.0.69, "Apache-2.0 OR MIT", thiserror-impl: 2.0.16, "Apache-2.0 OR MIT", thread_local: 1.1.9, "Apache-2.0 OR MIT", +threadpool: 1.8.1, "Apache-2.0 OR MIT", +thrift: 0.17.0, "Apache-2.0", time: 0.3.44, "Apache-2.0 OR MIT", time-core: 0.1.6, "Apache-2.0 OR MIT", time-macros: 0.2.24, "Apache-2.0 OR MIT", @@ -728,6 +812,7 @@ tinyvec: 1.10.0, "Apache-2.0 OR MIT OR Zlib", tinyvec_macros: 0.1.1, "Apache-2.0 OR MIT OR Zlib", tokio: 1.47.1, "MIT", tokio-macros: 2.5.0, "MIT", +tokio-rustls: 0.24.1, "Apache-2.0 OR MIT", tokio-rustls: 0.26.3, "Apache-2.0 OR MIT", tokio-stream: 0.1.17, "MIT", tokio-tar: 0.3.1, "Apache-2.0 OR MIT", @@ -761,7 +846,11 @@ trait-variant: 0.1.2, "Apache-2.0 OR MIT", try-lock: 0.2.5, "MIT", twox-hash: 2.1.2, "MIT", typed-builder: 0.15.2, "Apache-2.0 OR MIT", +typed-builder: 0.19.1, "Apache-2.0 OR MIT", +typed-builder: 0.20.1, "Apache-2.0 OR MIT", typed-builder-macro: 0.15.2, "Apache-2.0 OR MIT", +typed-builder-macro: 0.19.1, "Apache-2.0 OR MIT", +typed-builder-macro: 0.20.1, "Apache-2.0 OR MIT", typenum: 1.18.0, "Apache-2.0 OR MIT", ucd-trie: 0.1.7, "Apache-2.0 OR MIT", ulid: 1.2.1, "MIT", @@ -796,6 +885,7 @@ vergen-git2: 1.0.7, "Apache-2.0 OR MIT", vergen-lib: 0.1.6, "Apache-2.0 OR MIT", version_check: 0.9.5, "Apache-2.0 OR MIT", virtue: 0.0.18, "MIT", +vsimd: 0.8.0, "MIT", wait-timeout: 0.2.1, "Apache-2.0 OR MIT", walkdir: 2.5.0, "MIT OR Unlicense", want: 0.3.1, "MIT", @@ -803,15 +893,15 @@ wasi: 0.11.1+wasi-snapshot-preview1, "Apache-2.0 OR Apache-2.0 WITH LLVM-excepti wasi: 0.14.7+wasi-0.2.4, "Apache-2.0 OR Apache-2.0 WITH LLVM-exception OR MIT", wasip2: 1.0.1+wasi-0.2.4, "Apache-2.0 OR Apache-2.0 WITH LLVM-exception OR MIT", wasite: 0.1.0, "Apache-2.0 OR BSL-1.0 OR MIT", -wasm-bindgen: 0.2.103, "Apache-2.0 OR MIT", -wasm-bindgen-backend: 0.2.103, "Apache-2.0 OR MIT", -wasm-bindgen-futures: 0.4.53, "Apache-2.0 OR MIT", -wasm-bindgen-macro: 0.2.103, "Apache-2.0 OR MIT", -wasm-bindgen-macro-support: 0.2.103, "Apache-2.0 OR MIT", -wasm-bindgen-shared: 0.2.103, "Apache-2.0 OR MIT", +wasm-bindgen: 0.2.104, "Apache-2.0 OR MIT", +wasm-bindgen-backend: 0.2.104, "Apache-2.0 OR MIT", +wasm-bindgen-futures: 0.4.54, "Apache-2.0 OR MIT", +wasm-bindgen-macro: 0.2.104, "Apache-2.0 OR MIT", +wasm-bindgen-macro-support: 0.2.104, "Apache-2.0 OR MIT", +wasm-bindgen-shared: 0.2.104, "Apache-2.0 OR MIT", wasm-streams: 0.4.2, "Apache-2.0 OR MIT", wasm-timer: 0.2.5, "MIT", -web-sys: 0.3.80, "Apache-2.0 OR MIT", +web-sys: 0.3.81, "Apache-2.0 OR MIT", web-time: 1.1.0, "Apache-2.0 OR MIT", webpki-root-certs: 1.0.2, "CDLA-Permissive-2.0", webpki-roots: 0.26.11, "CDLA-Permissive-2.0", @@ -887,6 +977,7 @@ wit-bindgen: 0.46.0, "Apache-2.0 OR Apache-2.0 WITH LLVM-exception OR MIT", writeable: 0.6.1, "Unicode-3.0", wyz: 0.5.1, "MIT", xattr: 1.6.1, "Apache-2.0 OR MIT", +xmlparser: 0.13.6, "Apache-2.0 OR MIT", yansi: 1.0.1, "Apache-2.0 OR MIT", yasna: 0.5.2, "Apache-2.0 OR MIT", yew: 0.21.0, "Apache-2.0 OR MIT", diff --git a/core/connectors/sinks/iceberg_sink/Cargo.toml b/core/connectors/sinks/iceberg_sink/Cargo.toml index 9949fbd8a..556570052 100644 --- a/core/connectors/sinks/iceberg_sink/Cargo.toml +++ b/core/connectors/sinks/iceberg_sink/Cargo.toml @@ -45,6 +45,6 @@ once_cell = { workspace = true } parquet = { workspace = true } serde = { workspace = true } simd-json = { workspace = true } +strum = { workspace = true } tracing = { workspace = true } uuid = { workspace = true } -strum = { workspace = true } diff --git a/core/connectors/sinks/iceberg_sink/README.md b/core/connectors/sinks/iceberg_sink/README.md index 52887329b..e7e0a3808 100644 --- a/core/connectors/sinks/iceberg_sink/README.md +++ b/core/connectors/sinks/iceberg_sink/README.md @@ -21,35 +21,35 @@ uri = "http://localhost:8181" dynamic_routing = true dynamic_route_field = "db_table" store_url = "http://localhost:9000" -store_access_key_id = "admin" +store_access_key_id = "admin" store_secret_access_key = "password" store_region = "us-east-1" store_class = "s3" ``` -# Configuration Options +## Configuration Options -- **tables**: The names of the Iceberg tables you want to statically route Iggy messages to. The name should include the table’s namespace, separated by a dot (`.`). -- **catalog_type**: The type of catalog you are routing data to. **Currently, only REST catalogs are fully supported.** -- **warehouse**: The name of the bucket or warehouse where Iggy will upload data files. -- **URI**: The URI of the Iceberg catalog. -- **dynamic_routing**: Enables dynamic routing. See more details later in this document. -- **dynamic_route_field**: The name of the message field that specifies the Iceberg table to route data to. See more details below. -- **store_url**: The URL of the object storage for data uploads. -- **store_access_key_id**: The access key ID of the object storage. -- **store_secret_access_key**: The secret key used to upload data to the object storage. -- **store_region**: The region of the object storage, if applicable. -- **store_class**: The storage class to use. **Currently, only S3-compatible storage is supported.** +- **tables**: The names of the Iceberg tables you want to statically route Iggy messages to. The name should include the table’s namespace, separated by a dot (`.`). +- **catalog_type**: The type of catalog you are routing data to. **Currently, only REST catalogs are fully supported.** +- **warehouse**: The name of the bucket or warehouse where Iggy will upload data files. +- **URI**: The URI of the Iceberg catalog. +- **dynamic_routing**: Enables dynamic routing. See more details later in this document. +- **dynamic_route_field**: The name of the message field that specifies the Iceberg table to route data to. See more details below. +- **store_url**: The URL of the object storage for data uploads. +- **store_access_key_id**: The access key ID of the object storage. +- **store_secret_access_key**: The secret key used to upload data to the object storage. +- **store_region**: The region of the object storage, if applicable. +- **store_class**: The storage class to use. **Currently, only S3-compatible storage is supported.** ## Dynamic Routing -If you don't know the names of the Iceberg tables you want to route data to in advance, you can use the dynamic routing feature. -Insert a field in your Iggy messages with the name of the Iceberg table the message should be routed to. The Iggy connector will parse this field at runtime and route the message to the correct table. +If you don't know the names of the Iceberg tables you want to route data to in advance, you can use the dynamic routing feature. +Insert a field in your Iggy messages with the name of the Iceberg table the message should be routed to. The Iggy connector will parse this field at runtime and route the message to the correct table. -The Iggy Iceberg Connector will skip messages in the following cases: +The Iggy Iceberg Connector will skip messages in the following cases: -- The table declared in the message field does not exist. -- The message does not contain the field specified in the `dynamic_route_field` configuration option. +- The table declared in the message field does not exist. +- The message does not contain the field specified in the `dynamic_route_field` configuration option. ### Dynamic routing configuration example @@ -62,7 +62,7 @@ uri = "http://localhost:8181" dynamic_routing = true dynamic_route_field = "db_table" store_url = "http://localhost:9000" -store_access_key_id = "admin" +store_access_key_id = "admin" store_secret_access_key = "password" store_region = "us-east-1" store_class = "s3" @@ -75,8 +75,8 @@ key = "db_table" value.static = "nyc.users" ``` -**Note:** The value in the message field **must** contain both the namespace and the table name, separated by a dot (`.`). -Example: +**Note:** The value in the message field **must** contain both the namespace and the table name, separated by a dot (`.`). +Example: -- Namespace: `nyc` -- Table name: `users` +- Namespace: `nyc` +- Table name: `users` diff --git a/core/connectors/sinks/iceberg_sink/iggy_iceberg_sink_config.toml b/core/connectors/sinks/iceberg_sink/iggy_iceberg_sink_config.toml index 35c370c8b..143f908d1 100644 --- a/core/connectors/sinks/iceberg_sink/iggy_iceberg_sink_config.toml +++ b/core/connectors/sinks/iceberg_sink/iggy_iceberg_sink_config.toml @@ -66,7 +66,7 @@ uri = "http://localhost:8181" dynamic_routing = true dynamic_route_field = "db_table" store_url = "http://localhost:9000" -store_access_key_id = "admin" +store_access_key_id = "admin" store_secret_access_key = "password" store_region = "us-east-1" store_class = "s3"