From 392eb5dcc9decdf6166946da5887d9b211b893b3 Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 22 Apr 2025 16:26:31 +0800 Subject: [PATCH 01/13] feat: variant type support extension types --- Cargo.toml | 1 + src/query/expression/src/types/variant.rs | 34 ++++++++++++--- src/query/functions/src/scalars/variant.rs | 51 ++++++++++++++++++++++ 3 files changed, 81 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 62d55cd097e3..2db94cfc96dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -638,6 +638,7 @@ color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "c149502" } display-more = { git = "https://github.com/databendlabs/display-more", tag = "v0.1.2" } ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" } +jsonb = { git = "https://github.com/b41sh/jsonb", rev = "4e9315efa67c46777fd3097f0d189ff5990ac24c" } map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.2.3" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.9" } diff --git a/src/query/expression/src/types/variant.rs b/src/query/expression/src/types/variant.rs index 41af0f351881..e1165f5c86c9 100644 --- a/src/query/expression/src/types/variant.rs +++ b/src/query/expression/src/types/variant.rs @@ -230,13 +230,37 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec n.0.into(), NumberScalar::Float64(n) => n.0.into(), }, - ScalarRef::Decimal(x) => x.to_float64().into(), + ScalarRef::Decimal(x) => { + match x { + DecimalScalar::Decimal128(value, size) => { + let dec = jsonb::Decimal128 { + scale: size.scale, + value, + }; + jsonb::Value::Number(jsonb::Number::Decimal128(dec)) + } + DecimalScalar::Decimal256(value, size) => { + let dec = jsonb::Decimal256 { + scale: size.scale, + value, + }; + jsonb::Value::Number(jsonb::Number::Decimal256(dec)) + } + } + } ScalarRef::Boolean(b) => jsonb::Value::Bool(b), - ScalarRef::Binary(s) => jsonb::Value::String(hex::encode_upper(s).into()), + ScalarRef::Binary(s) => jsonb::Value::Binary(s), ScalarRef::String(s) => jsonb::Value::String(s.into()), - ScalarRef::Timestamp(ts) => timestamp_to_string(ts, tz).to_string().into(), - ScalarRef::Date(d) => date_to_string(d, tz).to_string().into(), - ScalarRef::Interval(i) => interval_to_string(&i).to_string().into(), + ScalarRef::Timestamp(ts) => jsonb::Value::Timestamp(jsonb::Timestamp {offset:0, value: ts}), + ScalarRef::Date(d) => jsonb::Value::Timestamp(jsonb::Date {offset:0, value: d}),, + ScalarRef::Interval(i) => { + let interval = jsonb::Interval { + months: i.months(), + days: i.days(), + microseconds: i.microseconds(), + }; + jsonb::Value::Interval(interval) + } ScalarRef::Array(col) => { let items = cast_scalars_to_variants(col.iter(), tz); let owned_jsonb = OwnedJsonb::build_array(items.iter().map(RawJsonb::new)) diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index 88590faad7f2..7b31c548a632 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -729,6 +729,57 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); + registry.register_combine_nullable_1_arg::( + "as_binary", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match RawJsonb::new(v).as_binary() { + Ok(Some(res)) => output.push(&res), + _ => output.push_null(), + } + }), + ); + + registry.register_combine_nullable_1_arg::( + "as_date", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match RawJsonb::new(v).as_date() { + Ok(Some(res)) => output.push(res.value), + _ => output.push_null(), + } + }), + ); + + registry.register_combine_nullable_1_arg::( + "as_timestamp", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match RawJsonb::new(v).as_timestamp() { + Ok(Some(res)) => output.push(res.value), + _ => output.push_null(), + } + }), + ); + registry.register_combine_nullable_1_arg::( "as_array", |_, _| FunctionDomain::Full, From 5977d7f39910e6539140cf8da0365d5b2c6de452 Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 22 Apr 2025 17:48:55 +0800 Subject: [PATCH 02/13] fix --- Cargo.lock | 237 +++++++++++---------- Cargo.toml | 4 +- src/query/expression/src/types/variant.rs | 7 +- src/query/functions/src/scalars/variant.rs | 20 ++ 4 files changed, 155 insertions(+), 113 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3f87339ae67a..99e70c297b67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -611,7 +611,7 @@ source = "git+https://github.com/datafuse-extras/async-backtrace.git?rev=dea4553 dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -712,7 +712,7 @@ source = "git+https://github.com/datafuse-extras/async-recursion.git?rev=a353334 dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -734,7 +734,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -751,7 +751,7 @@ checksum = "d556ec1359574147ec0c4fc5eb525f3f23263a592b1a9c07e0a75b427de55c97" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -1322,7 +1322,7 @@ dependencies = [ "regex", "rustc-hash 1.1.0", "shlex", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -1510,7 +1510,7 @@ dependencies = [ "proc-macro-crate 3.1.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", "syn_derive", ] @@ -1652,7 +1652,7 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -2081,7 +2081,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -2725,7 +2725,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -2768,7 +2768,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -2779,7 +2779,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -3203,7 +3203,7 @@ dependencies = [ "hex", "hyper-util", "itertools 0.13.0", - "jiff 0.2.1", + "jiff 0.2.10", "jsonb", "lexical-core", "log", @@ -3250,7 +3250,7 @@ dependencies = [ "geozero", "goldenfile", "hex", - "jiff 0.2.1", + "jiff 0.2.10", "jsonb", "lexical-core", "match-template", @@ -3305,7 +3305,7 @@ dependencies = [ "jaq-interpret", "jaq-parse", "jaq-std", - "jiff 0.2.1", + "jiff 0.2.10", "jsonb", "lexical-core", "libm", @@ -3398,7 +3398,7 @@ dependencies = [ "geo", "geozero", "hex", - "jiff 0.2.1", + "jiff 0.2.10", "lexical-core", "micromarshal", "rmp-serde", @@ -4527,7 +4527,7 @@ dependencies = [ "databend-storages-common-cache", "futures", "itertools 0.13.0", - "jiff 0.2.1", + "jiff 0.2.10", "jsonb", "log", "once_cell", @@ -4873,7 +4873,7 @@ dependencies = [ "databend-common-exception", "databend-common-expression", "dtparse", - "jiff 0.2.1", + "jiff 0.2.10", "num-traits", ] @@ -5146,7 +5146,7 @@ dependencies = [ "hyper-util", "indicatif", "itertools 0.13.0", - "jiff 0.2.1", + "jiff 0.2.10", "jsonb", "jwt-simple", "log", @@ -5250,7 +5250,7 @@ dependencies = [ "derive-visitor", "ethnum", "itertools 0.13.0", - "jiff 0.2.1", + "jiff 0.2.10", "jsonb", "rand 0.8.5", "reqwest", @@ -5433,7 +5433,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -5535,7 +5535,7 @@ checksum = "0c8e41236d5a9f04da3072d7186a76aba734e7bfd2cd05f7877fde172b65fb11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -5673,7 +5673,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -5683,7 +5683,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -5704,7 +5704,7 @@ dependencies = [ "convert_case 0.6.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", "unicode-xid", ] @@ -5790,7 +5790,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -5815,7 +5815,7 @@ checksum = "9556bc800956545d6420a640173e5ba7dfa82f38d3ea5a167eb555bc69ac3323" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -5928,7 +5928,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6012,7 +6012,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6032,7 +6032,7 @@ checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6044,7 +6044,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6065,7 +6065,7 @@ checksum = "de0d48a183585823424a4ce1aa132d174a6a81bd540895822eb4c8373a8e49e8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6131,7 +6131,7 @@ checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6283,7 +6283,7 @@ dependencies = [ "proc-macro-error2", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6529,7 +6529,7 @@ checksum = "b0fa992f1656e1707946bbba340ad244f0814009ef8c0118eb7b658395f19a2e" dependencies = [ "frunk_proc_macro_helpers", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6541,7 +6541,7 @@ dependencies = [ "frunk_core", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6553,7 +6553,7 @@ dependencies = [ "frunk_core", "frunk_proc_macro_helpers", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6651,7 +6651,7 @@ checksum = "5ac45ed0bddbd110eb68862768a194f88700f5b91c39931d2f432fab67a16d08" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -6716,7 +6716,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -7388,7 +7388,7 @@ checksum = "999ce923619f88194171a67fb3e6d613653b8d4d6078b529b15a765da0edcc17" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -8437,6 +8437,12 @@ dependencies = [ "tower-service", ] +[[package]] +name = "i256" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d33e2469f814f46f104fc4641cba20f3ba3b2b902e7d3c12db5bab37b33f6ac9" + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -8680,7 +8686,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -9128,10 +9134,11 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.1" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3590fea8e9e22d449600c9bbd481a8163bef223e4ff938e5f55899f8cf1adb93" +checksum = "5a064218214dc6a10fbae5ec5fa888d80c45d611aba169222fc272072bf7aef6" dependencies = [ + "jiff-static", "jiff-tzdb", "jiff-tzdb-platform", "log", @@ -9141,17 +9148,28 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "jiff-static" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199b7932d97e325aff3a7030e141eafe7f2c6268e1d1b24859b753a627f45254" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "jiff-tzdb" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf2cec2f5d266af45a071ece48b1fb89f3b00b2421ac3a5fe10285a6caaa60d3" +checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524" [[package]] name = "jiff-tzdb-platform" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a63c62e404e7b92979d2792352d885a7f8f83fd1d0d31eea582d77b2ceca697e" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" dependencies = [ "jiff-tzdb", ] @@ -9177,12 +9195,13 @@ dependencies = [ [[package]] name = "jsonb" version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7e7eadf220028b2482ef22189ee7ce946665874dcc1040d82c827ff06963fbe" +source = "git+https://github.com/b41sh/jsonb?rev=4c8c2feedbfcfcf04db6e76a887b59f63ff4417d#4c8c2feedbfcfcf04db6e76a887b59f63ff4417d" dependencies = [ "byteorder", "fast-float2", + "i256", "itoa", + "jiff 0.2.10", "nom 8.0.0", "num-traits", "ordered-float 5.0.0", @@ -10081,7 +10100,7 @@ dependencies = [ "proc-macro-error 1.0.4", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", "termcolor", "thiserror 1.0.65", ] @@ -10268,7 +10287,7 @@ dependencies = [ "proc-macro-error 1.0.4", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -10381,7 +10400,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -10463,7 +10482,7 @@ dependencies = [ "proc-macro-crate 3.1.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -10679,7 +10698,7 @@ dependencies = [ "proc-macro2", "quote", "semver", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -10721,7 +10740,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -11082,7 +11101,7 @@ dependencies = [ "regex", "regex-syntax 0.8.4", "structmeta", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -11234,7 +11253,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -11353,7 +11372,7 @@ dependencies = [ "proc-macro-crate 3.1.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -11487,7 +11506,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" dependencies = [ "proc-macro2", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -11587,7 +11606,7 @@ dependencies = [ "proc-macro-error-attr2", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -11598,9 +11617,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -11688,7 +11707,7 @@ checksum = "440f724eba9f6996b75d63681b0a92b06947f1457076d503a4d2e2c8f56442b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -11745,7 +11764,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.95", + "syn 2.0.100", "tempfile", ] @@ -11759,7 +11778,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -11964,7 +11983,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -11977,7 +11996,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -12082,9 +12101,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -12228,7 +12247,7 @@ checksum = "8b86292cf41ccfc96c5de7165c1c53d5b4ac540c5bab9d1857acbe9eba5f1a0b" dependencies = [ "proc-macro-hack", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -12281,7 +12300,7 @@ version = "0.1.1" source = "git+https://github.com/datafuse-extras/recursive.git?rev=6af35a1#6af35a1e59e7050f86ee19fbd0a79535d016c87d" dependencies = [ "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -12726,7 +12745,7 @@ dependencies = [ "proc-macro2", "quote", "rquickjs-core", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -13219,7 +13238,7 @@ checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -13261,7 +13280,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -13331,7 +13350,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -13589,7 +13608,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -13801,7 +13820,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -13824,7 +13843,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.95", + "syn 2.0.100", "tempfile", "tokio", "url", @@ -14028,7 +14047,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -14039,7 +14058,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -14080,7 +14099,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -14236,9 +14255,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.95" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -14265,7 +14284,7 @@ dependencies = [ "proc-macro-error 1.0.4", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -14291,7 +14310,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -14576,7 +14595,7 @@ checksum = "ae861f7d521762a2e5524ceeb3a518fab2c06c25e217a1d7270b8c5e158c141b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -14643,7 +14662,7 @@ checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -14654,7 +14673,7 @@ checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -14804,7 +14823,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -15014,7 +15033,7 @@ dependencies = [ "prost-build", "prost-types", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -15096,7 +15115,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -15230,7 +15249,7 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -15241,7 +15260,7 @@ checksum = "560b82d656506509d43abe30e0ba64c56b1953ab3d4fe7ba5902747a7a3cedd5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -15277,7 +15296,7 @@ checksum = "70b20a22c42c8f1cd23ce5e34f165d4d37038f5b663ad20fb6adbdf029172483" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -15598,7 +15617,7 @@ checksum = "d674d135b4a8c1d7e813e2f8d1c9a58308aee4a680323066025e53132218bd91" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -15761,7 +15780,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", "wasm-bindgen-shared", ] @@ -15795,7 +15814,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -15970,7 +15989,7 @@ dependencies = [ "anyhow", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", "wasmtime-component-util", "wasmtime-wit-bindgen", "wit-parser", @@ -16086,7 +16105,7 @@ checksum = "df09be00c38f49172ca9936998938476e3f2df782673a39ae2ef9fb0838341b6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -16232,7 +16251,7 @@ dependencies = [ "proc-macro2", "quote", "shellexpand", - "syn 2.0.95", + "syn 2.0.100", "witx", ] @@ -16244,7 +16263,7 @@ checksum = "9b8eb1a5783540696c59cefbfc9e52570c2d5e62bd47bdf0bdcef29231879db2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", "wiggle-generate", ] @@ -16373,7 +16392,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -16384,7 +16403,7 @@ checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -16395,7 +16414,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -16406,7 +16425,7 @@ checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -16862,7 +16881,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", "synstructure", ] @@ -16899,7 +16918,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -16910,7 +16929,7 @@ checksum = "6352c01d0edd5db859a63e2605f4ea3183ddbd15e2c4a9e7d32184df75e4f154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] @@ -16930,7 +16949,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", "synstructure", ] @@ -16959,7 +16978,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.100", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 2db94cfc96dd..0abee0867fc5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -350,7 +350,7 @@ jaq-core = "1.5.1" jaq-interpret = "1.5.0" jaq-parse = "1.0.3" jaq-std = "1.6.0" -jiff = { version = "0.2.1", features = ["serde", "tzdb-bundle-always"] } +jiff = { version = "0.2.8", features = ["serde", "tzdb-bundle-always"] } jsonb = "0.5.1" jwt-simple = { version = "0.12.10", default-features = false, features = ["pure-rust"] } lenient_semver = "0.4.2" @@ -638,7 +638,7 @@ color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "c149502" } display-more = { git = "https://github.com/databendlabs/display-more", tag = "v0.1.2" } ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" } -jsonb = { git = "https://github.com/b41sh/jsonb", rev = "4e9315efa67c46777fd3097f0d189ff5990ac24c" } +jsonb = { git = "https://github.com/b41sh/jsonb", rev = "4c8c2feedbfcfcf04db6e76a887b59f63ff4417d" } map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.2.3" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.9" } diff --git a/src/query/expression/src/types/variant.rs b/src/query/expression/src/types/variant.rs index e1165f5c86c9..315a917d4f00 100644 --- a/src/query/expression/src/types/variant.rs +++ b/src/query/expression/src/types/variant.rs @@ -231,6 +231,7 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec n.0.into(), }, ScalarRef::Decimal(x) => { + /** match x { DecimalScalar::Decimal128(value, size) => { let dec = jsonb::Decimal128 { @@ -247,17 +248,19 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec jsonb::Value::Bool(b), ScalarRef::Binary(s) => jsonb::Value::Binary(s), ScalarRef::String(s) => jsonb::Value::String(s.into()), ScalarRef::Timestamp(ts) => jsonb::Value::Timestamp(jsonb::Timestamp {offset:0, value: ts}), - ScalarRef::Date(d) => jsonb::Value::Timestamp(jsonb::Date {offset:0, value: d}),, + ScalarRef::Date(d) => jsonb::Value::Date(jsonb::Date {offset:0, value: d}), ScalarRef::Interval(i) => { let interval = jsonb::Interval { months: i.months(), days: i.days(), - microseconds: i.microseconds(), + micros: i.microseconds(), }; jsonb::Value::Interval(interval) } diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index 7b31c548a632..3a17bcc577db 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -18,6 +18,7 @@ use std::collections::HashSet; use std::iter::once; use std::sync::Arc; +use databend_common_expression::types::BinaryType; use bstr::ByteSlice; use databend_common_expression::types::binary::BinaryColumnBuilder; use databend_common_expression::types::date::string_to_date; @@ -780,6 +781,25 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); + registry.register_combine_nullable_1_arg::( + "as_interval", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match RawJsonb::new(v).as_interval() { + Ok(Some(res)) => { + todo!() + } + _ => output.push_null(), + } + }), + ); + registry.register_combine_nullable_1_arg::( "as_array", |_, _| FunctionDomain::Full, From 83fa549782a76b2c42ba75752450ae890e569045 Mon Sep 17 00:00:00 2001 From: baishen Date: Wed, 23 Apr 2025 11:59:58 +0800 Subject: [PATCH 03/13] fix --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/query/expression/src/types/variant.rs | 48 +++++++++-------- src/query/functions/src/scalars/variant.rs | 54 ++++++++++--------- .../it/scalars/testdata/function_list.txt | 8 +++ 5 files changed, 65 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 99e70c297b67..0216da9bae5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9195,7 +9195,7 @@ dependencies = [ [[package]] name = "jsonb" version = "0.5.1" -source = "git+https://github.com/b41sh/jsonb?rev=4c8c2feedbfcfcf04db6e76a887b59f63ff4417d#4c8c2feedbfcfcf04db6e76a887b59f63ff4417d" +source = "git+https://github.com/b41sh/jsonb?rev=e59567b08e4a3db886764096305b3be74c8e1e13#e59567b08e4a3db886764096305b3be74c8e1e13" dependencies = [ "byteorder", "fast-float2", diff --git a/Cargo.toml b/Cargo.toml index 0abee0867fc5..5998a882ee65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -638,7 +638,7 @@ color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "c149502" } display-more = { git = "https://github.com/databendlabs/display-more", tag = "v0.1.2" } ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" } -jsonb = { git = "https://github.com/b41sh/jsonb", rev = "4c8c2feedbfcfcf04db6e76a887b59f63ff4417d" } +jsonb = { git = "https://github.com/b41sh/jsonb", rev = "e59567b08e4a3db886764096305b3be74c8e1e13" } map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.2.3" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.9" } diff --git a/src/query/expression/src/types/variant.rs b/src/query/expression/src/types/variant.rs index 315a917d4f00..4a2e1c88bcaf 100644 --- a/src/query/expression/src/types/variant.rs +++ b/src/query/expression/src/types/variant.rs @@ -31,11 +31,11 @@ use super::date::date_to_string; use super::number::NumberScalar; use super::timestamp::timestamp_to_string; use crate::property::Domain; -use crate::types::interval::interval_to_string; use crate::types::map::KvPair; use crate::types::AnyType; use crate::types::ArgType; use crate::types::DataType; +use crate::types::DecimalScalar; use crate::types::DecimalSize; use crate::types::GenericMap; use crate::types::ValueType; @@ -230,32 +230,34 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec n.0.into(), NumberScalar::Float64(n) => n.0.into(), }, - ScalarRef::Decimal(x) => { - /** - match x { - DecimalScalar::Decimal128(value, size) => { - let dec = jsonb::Decimal128 { - scale: size.scale, - value, - }; - jsonb::Value::Number(jsonb::Number::Decimal128(dec)) - } - DecimalScalar::Decimal256(value, size) => { - let dec = jsonb::Decimal256 { - scale: size.scale, - value, - }; - jsonb::Value::Number(jsonb::Number::Decimal256(dec)) - } + ScalarRef::Decimal(x) => match x { + DecimalScalar::Decimal128(value, size) => { + let dec = jsonb::Decimal128 { + scale: size.scale, + value, + }; + jsonb::Value::Number(jsonb::Number::Decimal128(dec)) } - */ - todo!() - } + DecimalScalar::Decimal256(_value, _size) => { + // let dec = jsonb::Decimal256 { + // scale: size.scale, + // value, + //}; + // jsonb::Value::Number(jsonb::Number::Decimal256(dec)) + todo!() + } + }, ScalarRef::Boolean(b) => jsonb::Value::Bool(b), ScalarRef::Binary(s) => jsonb::Value::Binary(s), ScalarRef::String(s) => jsonb::Value::String(s.into()), - ScalarRef::Timestamp(ts) => jsonb::Value::Timestamp(jsonb::Timestamp {offset:0, value: ts}), - ScalarRef::Date(d) => jsonb::Value::Date(jsonb::Date {offset:0, value: d}), + ScalarRef::Timestamp(ts) => jsonb::Value::Timestamp(jsonb::Timestamp { + offset: 0, + value: ts, + }), + ScalarRef::Date(d) => jsonb::Value::Date(jsonb::Date { + offset: 0, + value: d, + }), ScalarRef::Interval(i) => { let interval = jsonb::Interval { months: i.months(), diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index 3a17bcc577db..bd7fe37c7270 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -18,8 +18,8 @@ use std::collections::HashSet; use std::iter::once; use std::sync::Arc; -use databend_common_expression::types::BinaryType; use bstr::ByteSlice; +use databend_common_column::types::months_days_micros; use databend_common_expression::types::binary::BinaryColumnBuilder; use databend_common_expression::types::date::string_to_date; use databend_common_expression::types::nullable::NullableColumn; @@ -32,11 +32,13 @@ use databend_common_expression::types::variant::cast_scalar_to_variant; use databend_common_expression::types::variant::cast_scalars_to_variants; use databend_common_expression::types::AnyType; use databend_common_expression::types::ArrayType; +use databend_common_expression::types::BinaryType; use databend_common_expression::types::Bitmap; use databend_common_expression::types::BooleanType; use databend_common_expression::types::DataType; use databend_common_expression::types::DateType; use databend_common_expression::types::GenericType; +use databend_common_expression::types::IntervalType; use databend_common_expression::types::MutableBitmap; use databend_common_expression::types::NullableType; use databend_common_expression::types::NumberDataType; @@ -767,37 +769,41 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_combine_nullable_1_arg::( "as_timestamp", |_, _| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|v, output, ctx| { - if let Some(validity) = &ctx.validity { - if !validity.get_bit(output.len()) { - output.push_null(); - return; + vectorize_with_builder_1_arg::>( + |v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } } - } - match RawJsonb::new(v).as_timestamp() { - Ok(Some(res)) => output.push(res.value), - _ => output.push_null(), - } - }), + match RawJsonb::new(v).as_timestamp() { + Ok(Some(res)) => output.push(res.value), + _ => output.push_null(), + } + }, + ), ); registry.register_combine_nullable_1_arg::( "as_interval", |_, _| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|v, output, ctx| { - if let Some(validity) = &ctx.validity { - if !validity.get_bit(output.len()) { - output.push_null(); - return; + vectorize_with_builder_1_arg::>( + |v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } } - } - match RawJsonb::new(v).as_interval() { - Ok(Some(res)) => { - todo!() + match RawJsonb::new(v).as_interval() { + Ok(Some(res)) => { + output.push(months_days_micros::new(res.months, res.days, res.micros)) + } + _ => output.push_null(), } - _ => output.push_null(), - } - }), + }, + ), ); registry.register_combine_nullable_1_arg::( diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 8d282ba9081b..9efa774a2f74 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -201,16 +201,24 @@ Functions overloads: 0 arrays_zip FACTORY 0 as_array(Variant) :: Variant NULL 1 as_array(Variant NULL) :: Variant NULL +0 as_binary(Variant) :: Binary NULL +1 as_binary(Variant NULL) :: Binary NULL 0 as_boolean(Variant) :: Boolean NULL 1 as_boolean(Variant NULL) :: Boolean NULL +0 as_date(Variant) :: Date NULL +1 as_date(Variant NULL) :: Date NULL 0 as_float(Variant) :: Float64 NULL 1 as_float(Variant NULL) :: Float64 NULL 0 as_integer(Variant) :: Int64 NULL 1 as_integer(Variant NULL) :: Int64 NULL +0 as_interval(Variant) :: Interval NULL +1 as_interval(Variant NULL) :: Interval NULL 0 as_object(Variant) :: Variant NULL 1 as_object(Variant NULL) :: Variant NULL 0 as_string(Variant) :: String NULL 1 as_string(Variant NULL) :: String NULL +0 as_timestamp(Variant) :: Timestamp NULL +1 as_timestamp(Variant NULL) :: Timestamp NULL 0 ascii(String) :: UInt8 1 ascii(String NULL) :: UInt8 NULL 0 asin(Float64) :: Float64 From 380dfcdf44fc5bed8ebb4492ad6f064b7cca79d1 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 24 Apr 2025 10:47:25 +0800 Subject: [PATCH 04/13] wrap i256 --- Cargo.lock | 16 +- Cargo.toml | 5 +- src/common/native/src/read/batch_read.rs | 3 +- src/common/native/src/read/deserialize.rs | 3 +- src/query/expression/Cargo.toml | 2 +- .../expression/src/aggregate/group_hash.rs | 2 +- .../expression/src/aggregate/payload_flush.rs | 2 +- .../expression/src/aggregate/payload_row.rs | 2 +- src/query/expression/src/filter/select.rs | 2 +- src/query/expression/src/kernels/concat.rs | 2 +- .../src/kernels/group_by_hash/method.rs | 2 +- .../group_by_hash/method_fixed_keys.rs | 2 +- src/query/expression/src/row/fixed.rs | 2 +- src/query/expression/src/row/row_converter.rs | 2 +- src/query/expression/src/types/decimal.rs | 352 +++++++++++++++++- src/query/expression/src/types/variant.rs | 17 +- src/query/expression/src/utils/display.rs | 10 +- src/query/expression/src/utils/mod.rs | 14 +- src/query/expression/src/values.rs | 4 +- .../src/aggregates/aggregate_array_agg.rs | 2 +- .../src/aggregates/aggregate_array_moving.rs | 2 +- .../src/aggregates/aggregate_bitmap.rs | 2 +- .../src/aggregates/aggregate_histogram.rs | 2 +- .../src/aggregates/aggregate_min_max_any.rs | 2 +- .../src/aggregates/aggregate_quantile_cont.rs | 2 +- .../src/aggregates/aggregate_quantile_disc.rs | 2 +- .../src/aggregates/aggregate_range_bound.rs | 2 +- .../src/scalars/arithmetic/src/arithmetic.rs | 3 +- .../src/scalars/decimal/src/arithmetic.rs | 2 +- .../functions/src/scalars/decimal/src/cast.rs | 10 +- .../src/scalars/decimal/src/comparison.rs | 2 +- .../functions/src/scalars/decimal/src/math.rs | 2 +- src/query/functions/src/scalars/hash.rs | 4 +- src/query/functions/src/scalars/variant.rs | 19 + src/query/functions/src/test_utils.rs | 3 +- .../sql/src/planner/semantic/type_check.rs | 3 +- .../src/parquet_rs/statistics/column.rs | 11 +- .../parquet/src/parquet_rs/statistics/page.rs | 10 +- .../src/parquet_rs/statistics/utils.rs | 2 +- .../storages/stage/src/read/avro/decoder.rs | 1 + .../stage/src/read/avro/schema_match.rs | 2 +- 41 files changed, 433 insertions(+), 101 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0216da9bae5c..70021d4cd03d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6185,10 +6185,10 @@ dependencies = [ [[package]] name = "ethnum" -version = "1.5.0" -source = "git+https://github.com/datafuse-extras/ethnum-rs?rev=4cb05f1#4cb05f1e407f76b193d81eef71b5dd0b73216856" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0939f82868b77ef93ce3c3c3daf2b3c526b456741da5a1a4559e590965b6026b" dependencies = [ - "borsh", "serde", ] @@ -8437,12 +8437,6 @@ dependencies = [ "tower-service", ] -[[package]] -name = "i256" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d33e2469f814f46f104fc4641cba20f3ba3b2b902e7d3c12db5bab37b33f6ac9" - [[package]] name = "iana-time-zone" version = "0.1.60" @@ -9195,11 +9189,11 @@ dependencies = [ [[package]] name = "jsonb" version = "0.5.1" -source = "git+https://github.com/b41sh/jsonb?rev=e59567b08e4a3db886764096305b3be74c8e1e13#e59567b08e4a3db886764096305b3be74c8e1e13" +source = "git+https://github.com/b41sh/jsonb?rev=df93358108160ff772b60f2337957b0886b5639a#df93358108160ff772b60f2337957b0886b5639a" dependencies = [ "byteorder", + "ethnum", "fast-float2", - "i256", "itoa", "jiff 0.2.10", "nom 8.0.0", diff --git a/Cargo.toml b/Cargo.toml index 5998a882ee65..e74bbb780170 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -306,7 +306,7 @@ enquote = "1.1.0" enum-as-inner = "0.6" enum_dispatch = "0.3.13" enumflags2 = { version = "0.7.7", features = ["serde"] } -ethnum = { version = "1.5.0" } +ethnum = { version = "1.5.1" } faststr = "0.2" feature-set = { version = "0.1.1" } flatbuffers = "24" # Must use the same version with arrow-ipc @@ -637,8 +637,7 @@ backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "7226 color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "c149502" } display-more = { git = "https://github.com/databendlabs/display-more", tag = "v0.1.2" } -ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" } -jsonb = { git = "https://github.com/b41sh/jsonb", rev = "e59567b08e4a3db886764096305b3be74c8e1e13" } +jsonb = { git = "https://github.com/b41sh/jsonb", rev = "df93358108160ff772b60f2337957b0886b5639a" } map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.2.3" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.9" } diff --git a/src/common/native/src/read/batch_read.rs b/src/common/native/src/read/batch_read.rs index 095d9cc735e5..a45825986229 100644 --- a/src/common/native/src/read/batch_read.rs +++ b/src/common/native/src/read/batch_read.rs @@ -73,7 +73,8 @@ pub fn read_nested_column( ), Decimal(decimal) if decimal.precision() > MAX_DECIMAL128_PRECISION => { init.push(InitNested::Primitive(is_nullable)); - read_nested_decimal::( + //read_nested_decimal::( + read_nested_decimal::( &mut readers.pop().unwrap(), data_type.clone(), decimal.size(), diff --git a/src/common/native/src/read/deserialize.rs b/src/common/native/src/read/deserialize.rs index 8e3e982c8f73..2a5a77e7ade3 100644 --- a/src/common/native/src/read/deserialize.rs +++ b/src/common/native/src/read/deserialize.rs @@ -164,7 +164,8 @@ where DynIter::new(DecimalNestedIter::< _, databend_common_column::types::i256, - ethnum::i256, + //ethnum::i256, + databend_common_expression::types::i256, >::new( readers.pop().unwrap(), data_type.clone(), t.size(), init )) diff --git a/src/query/expression/Cargo.toml b/src/query/expression/Cargo.toml index 94bcc0e83ee4..9715ad3fef2d 100644 --- a/src/query/expression/Cargo.toml +++ b/src/query/expression/Cargo.toml @@ -31,7 +31,7 @@ databend-common-io = { workspace = true } educe = { workspace = true } either = { workspace = true } enum-as-inner = { workspace = true } -ethnum = { workspace = true, features = ["serde", "macros", "borsh"] } +ethnum = { workspace = true, features = ["serde", "macros"] } futures = { workspace = true } geo = { workspace = true } geozero = { workspace = true } diff --git a/src/query/expression/src/aggregate/group_hash.rs b/src/query/expression/src/aggregate/group_hash.rs index ebf5828b54a5..95378c27705c 100644 --- a/src/query/expression/src/aggregate/group_hash.rs +++ b/src/query/expression/src/aggregate/group_hash.rs @@ -17,8 +17,8 @@ use databend_common_column::bitmap::Bitmap; use databend_common_column::buffer::Buffer; use databend_common_column::types::Index; use databend_common_exception::Result; -use ethnum::i256; +use crate::types::i256; use crate::types::AnyType; use crate::types::ArgType; use crate::types::BinaryType; diff --git a/src/query/expression/src/aggregate/payload_flush.rs b/src/query/expression/src/aggregate/payload_flush.rs index 4fe9f3583022..90690539894a 100644 --- a/src/query/expression/src/aggregate/payload_flush.rs +++ b/src/query/expression/src/aggregate/payload_flush.rs @@ -14,8 +14,8 @@ use databend_common_exception::Result; use databend_common_io::prelude::bincode_deserialize_from_slice; -use ethnum::i256; +use crate::types::i256; use super::partitioned_payload::PartitionedPayload; use super::payload::Payload; use super::probe_state::ProbeState; diff --git a/src/query/expression/src/aggregate/payload_row.rs b/src/query/expression/src/aggregate/payload_row.rs index ce8b908e0b5a..5fbd294dc1e5 100644 --- a/src/query/expression/src/aggregate/payload_row.rs +++ b/src/query/expression/src/aggregate/payload_row.rs @@ -16,7 +16,7 @@ use bumpalo::Bump; use databend_common_column::bitmap::Bitmap; use databend_common_io::prelude::bincode_deserialize_from_slice; use databend_common_io::prelude::bincode_serialize_into_buf; -use ethnum::i256; +use crate::types::i256; use crate::read; use crate::store; diff --git a/src/query/expression/src/filter/select.rs b/src/query/expression/src/filter/select.rs index 3d39b2a1ec59..d59003901818 100644 --- a/src/query/expression/src/filter/select.rs +++ b/src/query/expression/src/filter/select.rs @@ -14,7 +14,7 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use ethnum::i256; +use crate::types::i256; use crate::arrow::and_validities; use crate::filter::SelectOp; diff --git a/src/query/expression/src/kernels/concat.rs b/src/query/expression/src/kernels/concat.rs index 9883daffd886..51d2cbfd2e25 100644 --- a/src/query/expression/src/kernels/concat.rs +++ b/src/query/expression/src/kernels/concat.rs @@ -20,8 +20,8 @@ use databend_common_column::bitmap::Bitmap; use databend_common_column::buffer::Buffer; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use ethnum::i256; use itertools::Itertools; +use crate::types::i256; use crate::types::array::ArrayColumnBuilder; use crate::types::decimal::Decimal; diff --git a/src/query/expression/src/kernels/group_by_hash/method.rs b/src/query/expression/src/kernels/group_by_hash/method.rs index 2e69c1b73ce3..1c29af966b4d 100644 --- a/src/query/expression/src/kernels/group_by_hash/method.rs +++ b/src/query/expression/src/kernels/group_by_hash/method.rs @@ -21,9 +21,9 @@ use databend_common_exception::Result; use databend_common_hashtable::DictionaryKeys; use databend_common_hashtable::FastHash; use either::Either; -use ethnum::i256; use ethnum::u256; +use crate::types::i256; use crate::types::binary::BinaryColumn; use crate::types::decimal::Decimal; use crate::types::DataType; diff --git a/src/query/expression/src/kernels/group_by_hash/method_fixed_keys.rs b/src/query/expression/src/kernels/group_by_hash/method_fixed_keys.rs index 177b542be7d9..35c1018185b6 100644 --- a/src/query/expression/src/kernels/group_by_hash/method_fixed_keys.rs +++ b/src/query/expression/src/kernels/group_by_hash/method_fixed_keys.rs @@ -20,11 +20,11 @@ use databend_common_column::buffer::Buffer; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_hashtable::FastHash; -use ethnum::i256; use ethnum::u256; use ethnum::U256; use micromarshal::Marshal; +use crate::types::i256; use crate::types::boolean::BooleanType; use crate::types::decimal::Decimal; use crate::types::decimal::DecimalColumn; diff --git a/src/query/expression/src/row/fixed.rs b/src/query/expression/src/row/fixed.rs index 24f2d712faf2..e52a38c8c524 100644 --- a/src/query/expression/src/row/fixed.rs +++ b/src/query/expression/src/row/fixed.rs @@ -14,7 +14,7 @@ use databend_common_column::bitmap::Bitmap; use databend_common_column::types::months_days_micros; -use ethnum::i256; +use crate::types::i256; use super::row_converter::null_sentinel; use crate::types::binary::BinaryColumnBuilder; diff --git a/src/query/expression/src/row/row_converter.rs b/src/query/expression/src/row/row_converter.rs index f39ad0763e18..510eb01f505f 100644 --- a/src/query/expression/src/row/row_converter.rs +++ b/src/query/expression/src/row/row_converter.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use databend_common_column::types::months_days_micros; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use ethnum::i256; +use crate::types::i256; use super::fixed; use super::fixed::FixedLengthEncoding; diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index 6fdaee2845f6..cdcfe18a21c5 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -17,6 +17,20 @@ use std::fmt::Debug; use std::marker::PhantomData; use std::ops::Range; +use std::num::TryFromIntError; + +use micromarshal::Marshal; +use std::ops::Mul; +use std::ops::Div; +use std::ops::Add; +use std::ops::Sub; + +use std::ops::Neg; +use std::ops::AddAssign; +use std::ops::SubAssign; +use std::ops::MulAssign; +use std::ops::DivAssign; + use arrow_data::ArrayData; use arrow_data::ArrayDataBuilder; use borsh::BorshDeserialize; @@ -27,7 +41,7 @@ use databend_common_exception::Result; use databend_common_io::display_decimal_128; use databend_common_io::display_decimal_256; use enum_as_inner::EnumAsInner; -use ethnum::i256; +//use ethnum::i256; use ethnum::u256; use ethnum::AsI256; use itertools::Itertools; @@ -38,6 +52,8 @@ use num_traits::ToPrimitive; use serde::Deserialize; use serde::Serialize; +//use databend_common_column::types::NativeType; + use super::SimpleDomain; use crate::types::ArgType; use crate::types::DataType; @@ -50,6 +66,302 @@ use crate::Domain; use crate::Scalar; use crate::ScalarRef; +//use databend_common_column::types::i256; + +/// Physical representation of a decimal +#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] +#[allow(non_camel_case_types)] +#[repr(C)] +pub struct i256(pub ethnum::I256); + +impl i256 { + /// The additive identity for this integer type, i.e. `0`. + pub const ZERO: Self = Self(ethnum::I256([0; 2])); + + /// The multiplicative identity for this integer type, i.e. `1`. + pub const ONE: Self = Self(ethnum::I256::new(1)); + + /// The multiplicative inverse for this integer type, i.e. `-1`. + pub const MINUS_ONE: Self = Self(ethnum::I256::new(-1)); + + pub fn from(value: i128) -> Self { + Self(value.as_i256()) + } + + /// Returns a new [`i256`] from two `i128`. + pub fn from_words(hi: i128, lo: i128) -> Self { + Self(ethnum::I256::from_words(hi, lo)) + } + + #[inline] + pub const fn to_le_bytes(&self) -> [u8; 32] { + let (high, low) = self.0.into_words(); + let low = low.to_le_bytes(); + let high = high.to_le_bytes(); + let mut i = 0; + let mut bytes = [0u8; 32]; + while i != 16 { + bytes[i] = low[i]; + bytes[i + 16] = high[i]; + i += 1; + } + bytes + } + + #[inline] + pub const fn to_be_bytes(&self) -> [u8; 32] { + let (high, low) = self.0.into_words(); + let low = low.to_be_bytes(); + let high = high.to_be_bytes(); + let mut bytes = [0; 32]; + let mut i = 0; + while i != 16 { + bytes[i] = high[i]; + bytes[i + 16] = low[i]; + i += 1; + } + bytes + } + + #[inline] + pub const fn from_be_bytes(bytes: [u8; 32]) -> Self { + let mut low = [0; 16]; + let mut high = [0; 16]; + let mut i = 0; + while i != 16 { + high[i] = bytes[i]; + low[i] = bytes[i + 16]; + i += 1; + } + let high = i128::from_be_bytes(high); + let low = i128::from_be_bytes(low); + Self(ethnum::I256::from_words(high, low)) + } + + #[inline] + pub const fn from_le_bytes(bytes: [u8; 32]) -> Self { + let mut low = [0; 16]; + let mut high = [0; 16]; + let mut i = 0; + while i != 16 { + low[i] = bytes[i]; + high[i] = bytes[i + 16]; + i += 1; + } + let high = i128::from_be_bytes(high); + let low = i128::from_be_bytes(low); + Self(ethnum::I256::from_words(high, low)) + } + + #[inline] + pub const fn is_positive(self) -> bool { + self.0.is_positive() + } + + #[inline] + pub const fn is_negative(self) -> bool { + self.0.is_negative() + } + + /// Cast to a primitive `i128`. + #[inline] + pub const fn as_i128(self) -> i128 { + self.0.as_i128() + } + + /// Get the low 128-bit word for this signed integer. + #[inline] + pub fn low(&self) -> &i128 { + self.0.low() + } + + /// Get the high 128-bit word for this signed integer. + #[inline] + pub fn high(&self) -> &i128 { + self.0.high() + } + + #[allow(unused_attributes)] + #[inline] + pub fn abs(self) -> Self { + Self(self.0.abs()) + } + + #[inline] + pub fn checked_neg(self) -> Option { + self.0.checked_neg().map(|v| Self(v)) + } + + /// Raises self to the power of `exp`, using exponentiation by squaring. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// # use ethnum::I256; + /// + /// assert_eq!(I256::new(2).pow(5), 32); + /// ``` + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub fn pow(self, mut exp: u32) -> Self { + Self(self.0.pow(exp)) + } +} + +impl Neg for i256 { + type Output = Self; + + #[inline] + fn neg(self) -> Self::Output { + let (a, b) = self.0.into_words(); + Self(ethnum::I256::from_words(-a, b)) + } +} + +impl std::fmt::Debug for i256 { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{:?}", self.0) + } +} + +impl std::fmt::Display for i256 { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl AddAssign for i256 { + fn add_assign(&mut self, rhs: Self) { + self.0 += rhs.0; + } +} + +impl SubAssign for i256 { + fn sub_assign(&mut self, rhs: Self) { + self.0 -= rhs.0; + } +} + +impl MulAssign for i256 { + fn mul_assign(&mut self, rhs: Self) { + self.0 *= rhs.0; + } +} + +impl DivAssign for i256 { + fn div_assign(&mut self, rhs: Self) { + self.0 /= rhs.0; + } +} + +impl Add for i256 { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self(self.0 + rhs.0) + } +} + +impl Sub for i256 { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Self(self.0 - rhs.0) + } +} + +impl Mul for i256 { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + Self(self.0 * rhs.0) + } +} + +impl Div for i256 { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + Self(self.0 / rhs.0) + } +} + + + +macro_rules! impl_from { + ($($t:ty),* $(,)?) => {$( + impl From<$t> for i256 { + #[inline] + fn from(value: $t) -> Self { + i256(value.as_i256()) + } + } + )*}; +} + +impl_from! { + bool, + i8, i16, i32, i64, i128, + u8, u16, u32, u64, u128, +} + +impl TryFrom for i256 { + type Error = TryFromIntError; + + fn try_from(value: u256) -> std::result::Result { + let i256_value = ethnum::i256::try_from(value)?; + Ok(i256(i256_value)) + } +} + + + + +impl BorshSerialize for i256 { + fn serialize(&self, writer: &mut W) -> borsh::io::Result<()> { + BorshSerialize::serialize(&self.0.0, writer) + } +} + +impl BorshDeserialize for i256 { + fn deserialize_reader(reader: &mut R) -> borsh::io::Result { + let value: [i128; 2] = BorshDeserialize::deserialize_reader(reader)?; + Ok(Self(ethnum::I256(value))) + } +} + +impl Marshal for i256 { + fn marshal(&self, scratch: &mut [u8]) { + self.0.marshal(scratch); + } +} + +macro_rules! impl_into_float { + ($($t:ty => $f:ident),* $(,)?) => {$( + impl From for $t { + #[inline] + fn from(x: i256) -> $t { + x.0.$f() + } + } + )*}; +} + +impl_into_float! { + f32 => as_f32, f64 => as_f64, +} + + + + + + + + + #[derive(Debug, Clone, PartialEq, Eq)] pub struct DecimalType(PhantomData); @@ -502,12 +814,12 @@ impl Decimal for i128 { fn do_round_mul(self, rhs: Self, shift_scale: u32) -> Option { let div = i256::e(shift_scale); let res = if self.is_negative() == rhs.is_negative() { - (i256::from(self) * i256::from(rhs) + div / 2) / div + (i256::from(self) * i256::from(rhs) + div / i256::from(2)) / div } else { - (i256::from(self) * i256::from(rhs) - div / 2) / div + (i256::from(self) * i256::from(rhs) - div / i256::from(2)) / div }; - if !(i128::MIN..=i128::MAX).contains(&res) { + if !(i256::from(i128::MIN)..=i256::from(i128::MAX)).contains(&res) { None } else { Some(res.as_i128()) @@ -517,10 +829,10 @@ impl Decimal for i128 { fn do_round_div(self, rhs: Self, mul_scale: u32) -> Option { let mul = i256::e(mul_scale); if self.is_negative() == rhs.is_negative() { - let res = (i256::from(self) * i256::from(mul) + i256::from(rhs) / 2) / i256::from(rhs); + let res = (i256::from(self) * mul + i256::from(rhs) / i256::from(2)) / i256::from(rhs); Some(*res.low()) } else { - let res = (i256::from(self) * i256::from(mul) - i256::from(rhs) / 2) / i256::from(rhs); + let res = (i256::from(self) * mul - i256::from(rhs) / i256::from(2)) / i256::from(rhs); Some(*res.low()) } } @@ -720,7 +1032,7 @@ impl Decimal for i256 { } fn e(n: u32) -> Self { - (i256::ONE * 10).pow(n) + (i256::ONE * i256::from(10)).pow(n) } fn mem_size() -> usize { @@ -750,9 +1062,9 @@ impl Decimal for i256 { fn do_round_mul(self, rhs: Self, shift_scale: u32) -> Option { let div = i256::e(shift_scale); let ret: Option = if self.is_negative() == rhs.is_negative() { - self.checked_mul(rhs).map(|x| (x + div / 2) / div) + self.checked_mul(rhs).map(|x| (x + div / i256::from(2)) / div) } else { - self.checked_mul(rhs).map(|x| (x - div / 2) / div) + self.checked_mul(rhs).map(|x| (x - div / i256::from(2)) / div) }; ret.or_else(|| { @@ -785,9 +1097,9 @@ impl Decimal for i256 { let mul = i256::e(mul_scale); let ret: Option = if self.is_negative() == rhs.is_negative() { - self.checked_mul(mul).map(|x| (x + rhs / 2) / rhs) + self.checked_mul(mul).map(|x| (x + rhs / i256::from(2)) / rhs) } else { - self.checked_mul(mul).map(|x| (x - rhs / 2) / rhs) + self.checked_mul(mul).map(|x| (x - rhs / i256::from(2)) / rhs) }; ret.or_else(fallback) @@ -809,7 +1121,7 @@ impl Decimal for i256 { } fn from_float(value: f64) -> Self { - value.as_i256() + i256(value.as_i256()) } fn from_i128>(value: U) -> Self { @@ -855,21 +1167,21 @@ impl Decimal for i256 { } fn display(self, scale: u8) -> String { - display_decimal_256(self, scale) + display_decimal_256(self.0, scale) } fn to_float32(self, scale: u8) -> f32 { let div = 10_f32.powi(scale as i32); - self.as_f32() / div + self.0.as_f32() / div } fn to_float64(self, scale: u8) -> f64 { let div = 10_f64.powi(scale as i32); - self.as_f64() / div + self.0.as_f64() / div } fn to_int(self, scale: u8, rounding_mode: bool) -> Option { - if !(i128::MIN..=i128::MAX).contains(&self) { + if !(i256::from(i128::MIN)..=i256::from(i128::MAX)).contains(&self) { None } else { let val = self.as_i128(); @@ -940,12 +1252,12 @@ impl Decimal for i256 { })) } - const MIN: i256 = ethnum::int!( + const MIN: i256 = i256(ethnum::int!( "-9999999999999999999999999999999999999999999999999999999999999999999999999999" - ); - const MAX: i256 = ethnum::int!( + )); + const MAX: i256 = i256(ethnum::int!( "9999999999999999999999999999999999999999999999999999999999999999999999999999" - ); + )); fn to_column_from_buffer(value: Buffer, size: DecimalSize) -> DecimalColumn { DecimalColumn::Decimal256(value, size) } diff --git a/src/query/expression/src/types/variant.rs b/src/query/expression/src/types/variant.rs index 4a2e1c88bcaf..222ce2de9901 100644 --- a/src/query/expression/src/types/variant.rs +++ b/src/query/expression/src/types/variant.rs @@ -233,29 +233,28 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec match x { DecimalScalar::Decimal128(value, size) => { let dec = jsonb::Decimal128 { + precision: size.precision, scale: size.scale, value, }; jsonb::Value::Number(jsonb::Number::Decimal128(dec)) } - DecimalScalar::Decimal256(_value, _size) => { - // let dec = jsonb::Decimal256 { - // scale: size.scale, - // value, - //}; - // jsonb::Value::Number(jsonb::Number::Decimal256(dec)) - todo!() + DecimalScalar::Decimal256(value, size) => { + let dec = jsonb::Decimal256 { + precision: size.precision, + scale: size.scale, + value: value.0, + }; + jsonb::Value::Number(jsonb::Number::Decimal256(dec)) } }, ScalarRef::Boolean(b) => jsonb::Value::Bool(b), ScalarRef::Binary(s) => jsonb::Value::Binary(s), ScalarRef::String(s) => jsonb::Value::String(s.into()), ScalarRef::Timestamp(ts) => jsonb::Value::Timestamp(jsonb::Timestamp { - offset: 0, value: ts, }), ScalarRef::Date(d) => jsonb::Value::Date(jsonb::Date { - offset: 0, value: d, }), ScalarRef::Interval(i) => { diff --git a/src/query/expression/src/utils/display.rs b/src/query/expression/src/utils/display.rs index 2aff4183d539..47e63735e46b 100755 --- a/src/query/expression/src/utils/display.rs +++ b/src/query/expression/src/utils/display.rs @@ -356,7 +356,7 @@ impl Debug for DecimalScalar { write!( f, "{}_d256({},{})", - display_decimal_256(*val, size.scale), + display_decimal_256(val.0, size.scale), size.precision, size.scale ) @@ -372,7 +372,7 @@ impl Display for DecimalScalar { write!(f, "{}", display_decimal_128(*val, size.scale)) } DecimalScalar::Decimal256(val, size) => { - write!(f, "{}", display_decimal_256(*val, size.scale)) + write!(f, "{}", display_decimal_256(val.0, size.scale)) } } } @@ -424,7 +424,7 @@ impl Debug for DecimalColumn { .field(&format_args!( "[{}]", &val.iter() - .map(|x| display_decimal_256(*x, size.scale)) + .map(|x| display_decimal_256(x.0, size.scale)) .join(", ") )) .finish(), @@ -1116,8 +1116,8 @@ impl Display for DecimalDomain { } DecimalDomain::Decimal256(SimpleDomain { min, max }, size) => { write!(f, "{}", SimpleDomain { - min: display_decimal_256(*min, size.scale), - max: display_decimal_256(*max, size.scale), + min: display_decimal_256(min.0, size.scale), + max: display_decimal_256(max.0, size.scale), }) } } diff --git a/src/query/expression/src/utils/mod.rs b/src/query/expression/src/utils/mod.rs index f452eb651922..682a24938299 100644 --- a/src/query/expression/src/utils/mod.rs +++ b/src/query/expression/src/utils/mod.rs @@ -28,7 +28,7 @@ pub mod visitor; use databend_common_ast::Span; use databend_common_column::bitmap::Bitmap; use databend_common_exception::Result; -use ethnum::i256; +use crate::types::i256; pub use self::column_from::*; use crate::types::decimal::DecimalScalar; @@ -169,18 +169,18 @@ fn shrink_i64(num: i64) -> Scalar { fn shrink_d256(decimal: i256, size: DecimalSize) -> Scalar { if size.scale == 0 { - if decimal.is_positive() && decimal <= i256::from(u64::MAX) { - return shrink_u64(decimal.as_u64()); - } else if decimal <= i256::from(i64::MAX) && decimal >= i256::from(i64::MIN) { - return shrink_i64(decimal.as_i64()); + if decimal.is_positive() && decimal <= i256::from(u64::MAX.into()) { + return shrink_u64(decimal.0.as_u64()); + } else if decimal <= i256::from(i64::MAX.into()) && decimal >= i256::from(i64::MIN.into()) { + return shrink_i64(decimal.0.as_i64()); } } - let valid_bits = 256 - decimal.saturating_abs().leading_zeros(); + let valid_bits = 256 - decimal.0.saturating_abs().leading_zeros(); let log10_2 = std::f64::consts::LOG10_2; let mut precision = ((valid_bits as f64) * log10_2).floor() as u8; - if decimal.saturating_abs() >= i256::from(10).pow(precision as u32) { + if i256(decimal.0.saturating_abs()) >= i256::from(10).pow(precision as u32) { precision += 1; } diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index 13a87d6f8019..73e3a8780b20 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -33,7 +33,7 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_io::prelude::BinaryRead; use enum_as_inner::EnumAsInner; -use ethnum::i256; +use crate::types::i256; use geo::Geometry; use geo::Point; use geozero::CoordDimensions; @@ -1406,7 +1406,7 @@ impl Column { } DecimalDataType::Decimal256(size) => { let values = (0..len) - .map(|_| i256::from(rng.gen::())) + .map(|_| i256::from(rng.gen::().into())) .collect::>(); Column::Decimal(DecimalColumn::Decimal256(values.into(), *size)) } diff --git a/src/query/functions/src/aggregates/aggregate_array_agg.rs b/src/query/functions/src/aggregates/aggregate_array_agg.rs index 1d555171bd7a..325afee0399d 100644 --- a/src/query/functions/src/aggregates/aggregate_array_agg.rs +++ b/src/query/functions/src/aggregates/aggregate_array_agg.rs @@ -35,7 +35,7 @@ use databend_common_expression::ColumnBuilder; use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; -use ethnum::i256; +use databend_common_expression::types::i256; use super::aggregate_function_factory::AggregateFunctionDescription; use super::aggregate_scalar_state::ScalarStateFunc; diff --git a/src/query/functions/src/aggregates/aggregate_array_moving.rs b/src/query/functions/src/aggregates/aggregate_array_moving.rs index 9231662bc48e..3a72203768f4 100644 --- a/src/query/functions/src/aggregates/aggregate_array_moving.rs +++ b/src/query/functions/src/aggregates/aggregate_array_moving.rs @@ -43,8 +43,8 @@ use databend_common_expression::ColumnBuilder; use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; -use ethnum::i256; use num_traits::AsPrimitive; +use databend_common_expression::types::i256; use super::aggregate_function::AggregateFunction; use super::aggregate_function::AggregateFunctionRef; diff --git a/src/query/functions/src/aggregates/aggregate_bitmap.rs b/src/query/functions/src/aggregates/aggregate_bitmap.rs index a69fa50b9529..fe7530165a45 100644 --- a/src/query/functions/src/aggregates/aggregate_bitmap.rs +++ b/src/query/functions/src/aggregates/aggregate_bitmap.rs @@ -35,8 +35,8 @@ use databend_common_expression::ColumnBuilder; use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_io::prelude::BinaryWrite; -use ethnum::i256; use roaring::RoaringTreemap; +use databend_common_expression::types::i256; use super::aggregate_function_factory::AggregateFunctionDescription; use super::aggregate_function_factory::AggregateFunctionSortDesc; diff --git a/src/query/functions/src/aggregates/aggregate_histogram.rs b/src/query/functions/src/aggregates/aggregate_histogram.rs index 778e48e4ee34..22549982cccf 100644 --- a/src/query/functions/src/aggregates/aggregate_histogram.rs +++ b/src/query/functions/src/aggregates/aggregate_histogram.rs @@ -28,7 +28,7 @@ use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::AggregateFunctionRef; use databend_common_expression::Scalar; -use ethnum::i256; +use databend_common_expression::types::i256; use serde::Deserialize; use serde::Serialize; use string::StringColumnBuilder; diff --git a/src/query/functions/src/aggregates/aggregate_min_max_any.rs b/src/query/functions/src/aggregates/aggregate_min_max_any.rs index 52d37617cbcc..1bdefae2e15a 100644 --- a/src/query/functions/src/aggregates/aggregate_min_max_any.rs +++ b/src/query/functions/src/aggregates/aggregate_min_max_any.rs @@ -27,7 +27,7 @@ use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::Scalar; use databend_common_expression::SELECTIVITY_THRESHOLD; -use ethnum::i256; +use databend_common_expression::types::i256; use super::aggregate_function_factory::AggregateFunctionDescription; use super::aggregate_function_factory::AggregateFunctionSortDesc; diff --git a/src/query/functions/src/aggregates/aggregate_quantile_cont.rs b/src/query/functions/src/aggregates/aggregate_quantile_cont.rs index 9c5fcacbef09..b9bf9ff6fc74 100644 --- a/src/query/functions/src/aggregates/aggregate_quantile_cont.rs +++ b/src/query/functions/src/aggregates/aggregate_quantile_cont.rs @@ -29,8 +29,8 @@ use databend_common_expression::with_number_mapped_type; use databend_common_expression::Column; use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; -use ethnum::i256; use num_traits::AsPrimitive; +use databend_common_expression::types::i256; use super::AggregateUnaryFunction; use super::FunctionData; diff --git a/src/query/functions/src/aggregates/aggregate_quantile_disc.rs b/src/query/functions/src/aggregates/aggregate_quantile_disc.rs index 49a35517f0df..affd59cf47be 100644 --- a/src/query/functions/src/aggregates/aggregate_quantile_disc.rs +++ b/src/query/functions/src/aggregates/aggregate_quantile_disc.rs @@ -24,7 +24,7 @@ use databend_common_expression::types::number::*; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::Scalar; -use ethnum::i256; +use databend_common_expression::types::i256; use super::get_levels; use super::AggregateUnaryFunction; diff --git a/src/query/functions/src/aggregates/aggregate_range_bound.rs b/src/query/functions/src/aggregates/aggregate_range_bound.rs index 30e2121a04cf..880703f72442 100644 --- a/src/query/functions/src/aggregates/aggregate_range_bound.rs +++ b/src/query/functions/src/aggregates/aggregate_range_bound.rs @@ -26,12 +26,12 @@ use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::AggregateFunctionRef; use databend_common_expression::Scalar; -use ethnum::i256; use rand::prelude::SliceRandom; use rand::rngs::SmallRng; use rand::thread_rng; use rand::Rng; use rand::SeedableRng; +use databend_common_expression::types::i256; use super::assert_unary_arguments; use super::AggregateUnaryFunction; diff --git a/src/query/functions/src/scalars/arithmetic/src/arithmetic.rs b/src/query/functions/src/scalars/arithmetic/src/arithmetic.rs index 7647cb606869..427469f25243 100644 --- a/src/query/functions/src/scalars/arithmetic/src/arithmetic.rs +++ b/src/query/functions/src/scalars/arithmetic/src/arithmetic.rs @@ -20,6 +20,7 @@ use std::ops::BitXor; use std::str::FromStr; use std::sync::Arc; +use databend_common_expression::types::Decimal; use databend_common_expression::serialize::read_decimal_with_size; use databend_common_expression::types::decimal::DecimalDomain; use databend_common_expression::types::decimal::DecimalType; @@ -62,9 +63,9 @@ use databend_common_expression::FunctionSignature; use databend_functions_scalar_decimal::register_decimal_to_float; use databend_functions_scalar_decimal::register_decimal_to_int; use databend_functions_scalar_decimal::register_decimal_to_string; -use ethnum::i256; use lexical_core::FormattedSize; use num_traits::AsPrimitive; +use databend_common_expression::types::i256; pub fn register(registry: &mut FunctionRegistry) { registry.register_aliases("plus", &["add"]); diff --git a/src/query/functions/src/scalars/decimal/src/arithmetic.rs b/src/query/functions/src/scalars/decimal/src/arithmetic.rs index 1e4b0a8776ba..5959cf487c72 100644 --- a/src/query/functions/src/scalars/decimal/src/arithmetic.rs +++ b/src/query/functions/src/scalars/decimal/src/arithmetic.rs @@ -26,7 +26,7 @@ use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionEval; use databend_common_expression::FunctionRegistry; use databend_common_expression::FunctionSignature; -use ethnum::i256; +use databend_common_expression::types::i256; use super::convert_to_decimal; use super::convert_to_decimal_domain; diff --git a/src/query/functions/src/scalars/decimal/src/cast.rs b/src/query/functions/src/scalars/decimal/src/cast.rs index 910c55ac82bd..00b34a9faab4 100644 --- a/src/query/functions/src/scalars/decimal/src/cast.rs +++ b/src/query/functions/src/scalars/decimal/src/cast.rs @@ -37,8 +37,8 @@ use databend_common_expression::FunctionRegistry; use databend_common_expression::FunctionSignature; use databend_common_expression::Scalar; use databend_common_expression::Value; -use ethnum::i256; use num_traits::AsPrimitive; +use databend_common_expression::types::i256; // int float to decimal pub fn register_to_decimal(registry: &mut FunctionRegistry) { @@ -589,7 +589,7 @@ fn decimal_256_to_128( vectorize_with_builder_1_arg::, DecimalType>( |x: i256, builder: &mut Vec, ctx: &mut EvalContext| match x.checked_mul(factor) { - Some(x) if x <= max && x >= min => builder.push(*x.low()), + Some(x) if x <= max.into() && x >= min.into() => builder.push(*x.low()), _ => { ctx.set_error( builder.len(), @@ -614,7 +614,7 @@ fn decimal_256_to_128( }; match y { - Some(y) if (y <= max && y >= min) && (y != 0 || x / source_factor == 0) => { + Some(y) if (y <= max.into() && y >= min.into()) && (y != i256::from(0) || x / source_factor == i256::from(0)) => { builder.push(*y.low()); } _ => { @@ -635,7 +635,7 @@ macro_rules! m_decimal_to_decimal { ($from_size: expr, $dest_size: expr, $value: expr, $from_type_name: ty, $dest_type_name: ty, $ctx: expr) => { type F = $from_type_name; type T = $dest_type_name; - +/** let buffer: Value> = $value.try_downcast().unwrap(); // faster path let result: Value> = if $from_size.scale == $dest_size.scale @@ -707,6 +707,8 @@ macro_rules! m_decimal_to_decimal { }; result.upcast_decimal($dest_size) +*/ + todo!() }; } diff --git a/src/query/functions/src/scalars/decimal/src/comparison.rs b/src/query/functions/src/scalars/decimal/src/comparison.rs index 13f3356e29ea..f34ad415384f 100644 --- a/src/query/functions/src/scalars/decimal/src/comparison.rs +++ b/src/query/functions/src/scalars/decimal/src/comparison.rs @@ -27,7 +27,7 @@ use databend_common_expression::FunctionRegistry; use databend_common_expression::FunctionSignature; use databend_common_expression::SimpleDomainCmp; use databend_common_expression::Value; -use ethnum::i256; +use databend_common_expression::types::i256; use super::convert_to_decimal; use super::convert_to_decimal_domain; diff --git a/src/query/functions/src/scalars/decimal/src/math.rs b/src/query/functions/src/scalars/decimal/src/math.rs index bc9fcc664d5e..b8c15b4acb1a 100644 --- a/src/query/functions/src/scalars/decimal/src/math.rs +++ b/src/query/functions/src/scalars/decimal/src/math.rs @@ -27,7 +27,7 @@ use databend_common_expression::FunctionRegistry; use databend_common_expression::FunctionSignature; use databend_common_expression::Scalar; use databend_common_expression::Value; -use ethnum::i256; +use databend_common_expression::types::i256; pub fn register_decimal_math(registry: &mut FunctionRegistry) { let factory_rounds = |params: &[Scalar], args_type: &[DataType], round_mode: RoundMode| { diff --git a/src/query/functions/src/scalars/hash.rs b/src/query/functions/src/scalars/hash.rs index cc008bd2647b..72e39a92d891 100644 --- a/src/query/functions/src/scalars/hash.rs +++ b/src/query/functions/src/scalars/hash.rs @@ -42,7 +42,7 @@ use databend_common_expression::with_number_mapped_type; use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionRegistry; use databend_common_expression::Scalar; -use ethnum::i256; +use databend_common_expression::types::i256; use md5::Digest; use md5::Md5 as Md5Hasher; use naive_cityhash::cityhash64_with_seed; @@ -311,7 +311,7 @@ for_all_integer_types! { integer_impl } impl DFHash for i256 { #[inline] fn hash(&self, state: &mut H) { - Hash::hash(self.0.as_slice(), state); + Hash::hash(self.0.0.as_slice(), state); } } diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index bd7fe37c7270..5897e47b2fed 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -732,6 +732,25 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); +/** + registry.register_combine_nullable_1_arg::( + "as_decimal128", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match RawJsonb::new(v).as_decimal128() { + Ok(Some(res)) => output.push(&res), + _ => output.push_null(), + } + }), + ); +*/ + registry.register_combine_nullable_1_arg::( "as_binary", |_, _| FunctionDomain::Full, diff --git a/src/query/functions/src/test_utils.rs b/src/query/functions/src/test_utils.rs index c2b47fd701b4..fd12046dbf04 100644 --- a/src/query/functions/src/test_utils.rs +++ b/src/query/functions/src/test_utils.rs @@ -37,6 +37,7 @@ use databend_common_expression::ConstantFolder; use databend_common_expression::FunctionContext; use databend_common_expression::RawExpr; use databend_common_expression::Scalar; +use databend_common_expression::types::i256; use crate::BUILTIN_FUNCTIONS; @@ -662,7 +663,7 @@ fn transform_literal(lit: ASTLiteral) -> Scalar { value, precision, scale, - } => Scalar::Decimal(DecimalScalar::Decimal256(value, DecimalSize { + } => Scalar::Decimal(DecimalScalar::Decimal256(i256(value), DecimalSize { precision, scale, })), diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 762baa4f6908..b2aa323c7668 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -21,6 +21,7 @@ use std::str::FromStr; use std::sync::Arc; use std::vec; +use databend_common_expression::types::i256; use databend_common_ast::ast::BinaryOperator; use databend_common_ast::ast::ColumnID; use databend_common_ast::ast::ColumnRef; @@ -3961,7 +3962,7 @@ impl<'a> TypeChecker<'a> { value, precision, scale, - } => Scalar::Decimal(DecimalScalar::Decimal256(*value, DecimalSize { + } => Scalar::Decimal(DecimalScalar::Decimal256(i256(*value), DecimalSize { precision: *precision, scale: *scale, })), diff --git a/src/query/storages/parquet/src/parquet_rs/statistics/column.rs b/src/query/storages/parquet/src/parquet_rs/statistics/column.rs index ac155c9a4b04..9cda20a43d48 100644 --- a/src/query/storages/parquet/src/parquet_rs/statistics/column.rs +++ b/src/query/storages/parquet/src/parquet_rs/statistics/column.rs @@ -19,7 +19,8 @@ use databend_common_expression::types::NumberDataType; use databend_common_expression::Scalar; use databend_common_expression::TableDataType; use databend_storages_common_table_meta::meta::ColumnStatistics; -use ethnum::I256; +use databend_common_expression::types::i256; + use parquet::data_type::AsBytes; use parquet::file::statistics::Statistics; @@ -58,8 +59,8 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option< Scalar::Decimal(DecimalScalar::Decimal128(i128::from(min), *size)), ), TableDataType::Decimal(DecimalDataType::Decimal256(size)) => ( - Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(max), *size)), - Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(min), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max.into()), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min.into()), *size)), ), _ => return None, } @@ -89,8 +90,8 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option< Scalar::Decimal(DecimalScalar::Decimal128(i128::from(min), *size)), ), TableDataType::Decimal(DecimalDataType::Decimal256(size)) => ( - Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(max), *size)), - Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(min), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max.into()), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min.into()), *size)), ), _ => return None, } diff --git a/src/query/storages/parquet/src/parquet_rs/statistics/page.rs b/src/query/storages/parquet/src/parquet_rs/statistics/page.rs index 0ae34f62bab0..bbb97ef2162a 100644 --- a/src/query/storages/parquet/src/parquet_rs/statistics/page.rs +++ b/src/query/storages/parquet/src/parquet_rs/statistics/page.rs @@ -18,8 +18,8 @@ use databend_common_expression::types::DecimalDataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::Scalar; use databend_common_expression::TableDataType; +use databend_common_expression::types::i256; use databend_storages_common_table_meta::meta::ColumnStatistics; -use ethnum::I256; use parquet::data_type::AsBytes; use parquet::data_type::ByteArray; use parquet::data_type::FixedLenByteArray; @@ -135,8 +135,8 @@ fn convert_page_index_int32( Scalar::Decimal(DecimalScalar::Decimal128(i128::from(min), *size)), ), TableDataType::Decimal(DecimalDataType::Decimal256(size)) => ( - Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(max), *size)), - Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(min), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max.into()), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min.into()), *size)), ), _ => unreachable!(), }; @@ -165,8 +165,8 @@ fn convert_page_index_int64( Scalar::Decimal(DecimalScalar::Decimal128(i128::from(min), *size)), ), TableDataType::Decimal(DecimalDataType::Decimal256(size)) => ( - Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(max), *size)), - Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(min), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max.into()), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min.into()), *size)), ), _ => unreachable!(), }; diff --git a/src/query/storages/parquet/src/parquet_rs/statistics/utils.rs b/src/query/storages/parquet/src/parquet_rs/statistics/utils.rs index c155ca5d8930..7e4c6dddd7bd 100644 --- a/src/query/storages/parquet/src/parquet_rs/statistics/utils.rs +++ b/src/query/storages/parquet/src/parquet_rs/statistics/utils.rs @@ -28,7 +28,7 @@ pub fn decode_decimal128_from_bytes(arr: &FixedLenByteArray, size: DecimalSize) pub fn decode_decimal256_from_bytes(arr: &FixedLenByteArray, size: DecimalSize) -> Scalar { let v = i256::from_be_bytes(sign_extend_be(arr.as_bytes())); let (lo, hi) = v.to_parts(); - let v = I256::from_words(hi, lo as i128); + let v = databend_common_expression::types::i256(I256::from_words(hi, lo as i128)); Scalar::Decimal(DecimalScalar::Decimal256(v, size)) } diff --git a/src/query/storages/stage/src/read/avro/decoder.rs b/src/query/storages/stage/src/read/avro/decoder.rs index ba2952cb8ac0..363cc227fe6d 100644 --- a/src/query/storages/stage/src/read/avro/decoder.rs +++ b/src/query/storages/stage/src/read/avro/decoder.rs @@ -42,6 +42,7 @@ use databend_common_meta_app::principal::NullAs; use lexical_core::FromLexical; use num_bigint::BigInt; use num_traits::NumCast; +use databend_common_expression::types::i256; use crate::read::avro::avro_to_jsonb::to_jsonb; use crate::read::avro::schema_match::MatchedField; diff --git a/src/query/storages/stage/src/read/avro/schema_match.rs b/src/query/storages/stage/src/read/avro/schema_match.rs index c2971b819ffe..ee5839c00b5a 100644 --- a/src/query/storages/stage/src/read/avro/schema_match.rs +++ b/src/query/storages/stage/src/read/avro/schema_match.rs @@ -18,7 +18,7 @@ use apache_avro::Schema; use databend_common_expression::types::Decimal; use databend_common_expression::types::NumberDataType; use databend_common_expression::TableDataType; -use ethnum::i256; +use databend_common_expression::types::i256; type MatchResult = Result; From 948d4502295edd662be5756e29f6b02f26061f0e Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 24 Apr 2025 13:55:56 +0800 Subject: [PATCH 05/13] fix --- Cargo.toml | 2 +- src/common/native/src/read/batch_read.rs | 7 +- src/common/native/src/read/deserialize.rs | 1 - .../expression/src/aggregate/payload_flush.rs | 2 +- .../expression/src/aggregate/payload_row.rs | 2 +- src/query/expression/src/filter/select.rs | 2 +- src/query/expression/src/kernels/concat.rs | 2 +- .../src/kernels/group_by_hash/method.rs | 2 +- .../group_by_hash/method_fixed_keys.rs | 2 +- src/query/expression/src/row/fixed.rs | 2 +- src/query/expression/src/row/row_converter.rs | 2 +- src/query/expression/src/types/decimal.rs | 127 ++++++++++++++---- src/query/expression/src/types/variant.rs | 8 +- src/query/expression/src/utils/mod.rs | 14 +- src/query/expression/src/values.rs | 4 +- .../src/aggregates/aggregate_array_agg.rs | 2 +- .../src/aggregates/aggregate_array_moving.rs | 2 +- .../src/aggregates/aggregate_bitmap.rs | 2 +- .../src/aggregates/aggregate_histogram.rs | 2 +- .../src/aggregates/aggregate_min_max_any.rs | 2 +- .../src/aggregates/aggregate_quantile_cont.rs | 2 +- .../src/aggregates/aggregate_quantile_disc.rs | 2 +- .../src/aggregates/aggregate_range_bound.rs | 2 +- .../src/scalars/arithmetic/src/arithmetic.rs | 4 +- .../src/scalars/decimal/src/arithmetic.rs | 2 +- .../functions/src/scalars/decimal/src/cast.rs | 12 +- .../src/scalars/decimal/src/comparison.rs | 2 +- .../functions/src/scalars/decimal/src/math.rs | 2 +- src/query/functions/src/scalars/hash.rs | 4 +- src/query/functions/src/scalars/variant.rs | 64 +++++++-- src/query/functions/src/test_utils.rs | 2 +- .../sql/src/planner/semantic/type_check.rs | 2 +- .../src/parquet_rs/statistics/column.rs | 12 +- .../parquet/src/parquet_rs/statistics/page.rs | 11 +- .../src/parquet_rs/statistics/utils.rs | 3 +- .../storages/stage/src/read/avro/decoder.rs | 1 - .../stage/src/read/avro/schema_match.rs | 2 +- 37 files changed, 208 insertions(+), 110 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e74bbb780170..2ad8162a41bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -350,7 +350,7 @@ jaq-core = "1.5.1" jaq-interpret = "1.5.0" jaq-parse = "1.0.3" jaq-std = "1.6.0" -jiff = { version = "0.2.8", features = ["serde", "tzdb-bundle-always"] } +jiff = { version = "0.2.10", features = ["serde", "tzdb-bundle-always"] } jsonb = "0.5.1" jwt-simple = { version = "0.12.10", default-features = false, features = ["pure-rust"] } lenient_semver = "0.4.2" diff --git a/src/common/native/src/read/batch_read.rs b/src/common/native/src/read/batch_read.rs index a45825986229..5371ff553041 100644 --- a/src/common/native/src/read/batch_read.rs +++ b/src/common/native/src/read/batch_read.rs @@ -73,8 +73,11 @@ pub fn read_nested_column( ), Decimal(decimal) if decimal.precision() > MAX_DECIMAL128_PRECISION => { init.push(InitNested::Primitive(is_nullable)); - //read_nested_decimal::( - read_nested_decimal::( + read_nested_decimal::< + databend_common_column::types::i256, + databend_common_expression::types::i256, + _, + >( &mut readers.pop().unwrap(), data_type.clone(), decimal.size(), diff --git a/src/common/native/src/read/deserialize.rs b/src/common/native/src/read/deserialize.rs index 2a5a77e7ade3..d6cf0e53033c 100644 --- a/src/common/native/src/read/deserialize.rs +++ b/src/common/native/src/read/deserialize.rs @@ -164,7 +164,6 @@ where DynIter::new(DecimalNestedIter::< _, databend_common_column::types::i256, - //ethnum::i256, databend_common_expression::types::i256, >::new( readers.pop().unwrap(), data_type.clone(), t.size(), init diff --git a/src/query/expression/src/aggregate/payload_flush.rs b/src/query/expression/src/aggregate/payload_flush.rs index 90690539894a..59906620afdf 100644 --- a/src/query/expression/src/aggregate/payload_flush.rs +++ b/src/query/expression/src/aggregate/payload_flush.rs @@ -15,7 +15,6 @@ use databend_common_exception::Result; use databend_common_io::prelude::bincode_deserialize_from_slice; -use crate::types::i256; use super::partitioned_payload::PartitionedPayload; use super::payload::Payload; use super::probe_state::ProbeState; @@ -25,6 +24,7 @@ use crate::types::binary::BinaryColumn; use crate::types::binary::BinaryColumnBuilder; use crate::types::decimal::Decimal; use crate::types::decimal::DecimalType; +use crate::types::i256; use crate::types::nullable::NullableColumn; use crate::types::string::StringColumn; use crate::types::string::StringColumnBuilder; diff --git a/src/query/expression/src/aggregate/payload_row.rs b/src/query/expression/src/aggregate/payload_row.rs index 5fbd294dc1e5..06308ba9be6f 100644 --- a/src/query/expression/src/aggregate/payload_row.rs +++ b/src/query/expression/src/aggregate/payload_row.rs @@ -16,13 +16,13 @@ use bumpalo::Bump; use databend_common_column::bitmap::Bitmap; use databend_common_io::prelude::bincode_deserialize_from_slice; use databend_common_io::prelude::bincode_serialize_into_buf; -use crate::types::i256; use crate::read; use crate::store; use crate::types::binary::BinaryColumn; use crate::types::decimal::DecimalColumn; use crate::types::decimal::DecimalType; +use crate::types::i256; use crate::types::AnyType; use crate::types::ArgType; use crate::types::BinaryType; diff --git a/src/query/expression/src/filter/select.rs b/src/query/expression/src/filter/select.rs index d59003901818..ed5f1b842b2a 100644 --- a/src/query/expression/src/filter/select.rs +++ b/src/query/expression/src/filter/select.rs @@ -14,12 +14,12 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use crate::types::i256; use crate::arrow::and_validities; use crate::filter::SelectOp; use crate::filter::SelectStrategy; use crate::types::decimal::DecimalType; +use crate::types::i256; use crate::types::nullable::NullableColumn; use crate::types::number::*; use crate::types::AnyType; diff --git a/src/query/expression/src/kernels/concat.rs b/src/query/expression/src/kernels/concat.rs index 51d2cbfd2e25..ab61a79b491c 100644 --- a/src/query/expression/src/kernels/concat.rs +++ b/src/query/expression/src/kernels/concat.rs @@ -21,11 +21,11 @@ use databend_common_column::buffer::Buffer; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use itertools::Itertools; -use crate::types::i256; use crate::types::array::ArrayColumnBuilder; use crate::types::decimal::Decimal; use crate::types::decimal::DecimalColumn; +use crate::types::i256; use crate::types::map::KvColumnBuilder; use crate::types::nullable::NullableColumn; use crate::types::number::NumberColumn; diff --git a/src/query/expression/src/kernels/group_by_hash/method.rs b/src/query/expression/src/kernels/group_by_hash/method.rs index 1c29af966b4d..a6b259da4aa4 100644 --- a/src/query/expression/src/kernels/group_by_hash/method.rs +++ b/src/query/expression/src/kernels/group_by_hash/method.rs @@ -23,9 +23,9 @@ use databend_common_hashtable::FastHash; use either::Either; use ethnum::u256; -use crate::types::i256; use crate::types::binary::BinaryColumn; use crate::types::decimal::Decimal; +use crate::types::i256; use crate::types::DataType; use crate::types::DecimalDataType; use crate::types::NumberDataType; diff --git a/src/query/expression/src/kernels/group_by_hash/method_fixed_keys.rs b/src/query/expression/src/kernels/group_by_hash/method_fixed_keys.rs index 35c1018185b6..7608a328f404 100644 --- a/src/query/expression/src/kernels/group_by_hash/method_fixed_keys.rs +++ b/src/query/expression/src/kernels/group_by_hash/method_fixed_keys.rs @@ -24,10 +24,10 @@ use ethnum::u256; use ethnum::U256; use micromarshal::Marshal; -use crate::types::i256; use crate::types::boolean::BooleanType; use crate::types::decimal::Decimal; use crate::types::decimal::DecimalColumn; +use crate::types::i256; use crate::types::nullable::NullableColumn; use crate::types::number::Number; use crate::types::number::NumberColumn; diff --git a/src/query/expression/src/row/fixed.rs b/src/query/expression/src/row/fixed.rs index e52a38c8c524..e98a27deac17 100644 --- a/src/query/expression/src/row/fixed.rs +++ b/src/query/expression/src/row/fixed.rs @@ -14,10 +14,10 @@ use databend_common_column::bitmap::Bitmap; use databend_common_column::types::months_days_micros; -use crate::types::i256; use super::row_converter::null_sentinel; use crate::types::binary::BinaryColumnBuilder; +use crate::types::i256; use crate::types::F32; use crate::types::F64; diff --git a/src/query/expression/src/row/row_converter.rs b/src/query/expression/src/row/row_converter.rs index 510eb01f505f..8af10d312d06 100644 --- a/src/query/expression/src/row/row_converter.rs +++ b/src/query/expression/src/row/row_converter.rs @@ -17,7 +17,6 @@ use std::sync::Arc; use databend_common_column::types::months_days_micros; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use crate::types::i256; use super::fixed; use super::fixed::FixedLengthEncoding; @@ -25,6 +24,7 @@ use super::variable; use crate::types::binary::BinaryColumn; use crate::types::binary::BinaryColumnBuilder; use crate::types::decimal::DecimalColumn; +use crate::types::i256; use crate::types::DataType; use crate::types::DecimalDataType; use crate::types::NumberColumn; diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index bc2c1181b163..4d4c65cb28bd 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -15,21 +15,17 @@ use std::cmp::Ordering; use std::fmt::Debug; use std::marker::PhantomData; -use std::ops::Range; - use std::num::TryFromIntError; - -use micromarshal::Marshal; -use std::ops::Mul; -use std::ops::Div; use std::ops::Add; -use std::ops::Sub; - -use std::ops::Neg; use std::ops::AddAssign; -use std::ops::SubAssign; -use std::ops::MulAssign; +use std::ops::Div; use std::ops::DivAssign; +use std::ops::Mul; +use std::ops::MulAssign; +use std::ops::Neg; +use std::ops::Range; +use std::ops::Sub; +use std::ops::SubAssign; use arrow_data::ArrayData; use arrow_data::ArrayDataBuilder; @@ -41,10 +37,11 @@ use databend_common_exception::Result; use databend_common_io::display_decimal_128; use databend_common_io::display_decimal_256; use enum_as_inner::EnumAsInner; -//use ethnum::i256; +// use ethnum::i256; use ethnum::u256; use ethnum::AsI256; use itertools::Itertools; +use micromarshal::Marshal; use num_bigint::BigInt; use num_traits::FromBytes; use num_traits::NumCast; @@ -52,8 +49,7 @@ use num_traits::ToPrimitive; use serde::Deserialize; use serde::Serialize; -//use databend_common_column::types::NativeType; - +// use databend_common_column::types::NativeType; use super::SimpleDomain; use crate::types::ArgType; use crate::types::DataType; @@ -66,10 +62,10 @@ use crate::Domain; use crate::Scalar; use crate::ScalarRef; -//use databend_common_column::types::i256; +// use databend_common_column::types::i256; /// Physical representation of a decimal -#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] +#[derive(Clone, Copy, Default, Eq, Ord, Serialize, Deserialize)] #[allow(non_camel_case_types)] #[repr(C)] pub struct i256(pub ethnum::I256); @@ -84,8 +80,14 @@ impl i256 { /// The multiplicative inverse for this integer type, i.e. `-1`. pub const MINUS_ONE: Self = Self(ethnum::I256::new(-1)); - pub fn from(value: i128) -> Self { - Self(value.as_i256()) + // pub fn from(value: i128) -> Self { + // Self(value.as_i256()) + //} + + /// Creates a new 256-bit integer value from a primitive `i128` integer. + #[inline] + pub const fn new(value: i128) -> Self { + Self(ethnum::I256::new(value)) } /// Returns a new [`i256`] from two `i128`. @@ -163,6 +165,28 @@ impl i256 { self.0.is_negative() } + #[inline] + pub fn saturating_abs(self) -> Self { + Self(self.0.saturating_abs()) + } + + #[inline(always)] + pub fn leading_zeros(self) -> u32 { + self.0.leading_zeros() + } + + /// Cast to a primitive `u64`. + #[inline] + pub const fn as_u64(self) -> u64 { + self.0.as_u64() + } + + /// Cast to a primitive `i64`. + #[inline] + pub const fn as_i64(self) -> i64 { + self.0.as_i64() + } + /// Cast to a primitive `i128`. #[inline] pub const fn as_i128(self) -> i128 { @@ -289,8 +313,6 @@ impl Div for i256 { } } - - macro_rules! impl_from { ($($t:ty),* $(,)?) => {$( impl From<$t> for i256 { @@ -317,12 +339,9 @@ impl TryFrom for i256 { } } - - - impl BorshSerialize for i256 { fn serialize(&self, writer: &mut W) -> borsh::io::Result<()> { - BorshSerialize::serialize(&self.0.0, writer) + BorshSerialize::serialize(&self.0 .0, writer) } } @@ -354,13 +373,59 @@ impl_into_float! { f32 => as_f32, f64 => as_f64, } +impl core::hash::Hash for i256 { + #[inline] + fn hash(&self, hasher: &mut H) + where H: core::hash::Hasher { + core::hash::Hash::hash(&self.0, hasher); + } +} +impl PartialEq for i256 { + #[inline] + fn eq(&self, other: &Self) -> bool { + let (ahi, alo) = self.0.into_words(); + let (bhi, blo) = other.0.into_words(); + (ahi == bhi) & (alo == blo) + // bitwise and rather than logical and + // to make O0 code more effecient. + } +} +impl PartialEq for i256 { + #[inline] + fn eq(&self, other: &i128) -> bool { + *self == i256::new(*other) + } +} +impl PartialEq for i128 { + #[inline] + fn eq(&self, other: &i256) -> bool { + i256::new(*self) == *other + } +} +impl PartialOrd for i256 { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl PartialOrd for i256 { + #[inline] + fn partial_cmp(&self, rhs: &i128) -> Option { + Some(self.cmp(&i256::new(*rhs))) + } +} - +impl PartialOrd for i128 { + #[inline] + fn partial_cmp(&self, rhs: &i256) -> Option { + Some(i256::new(*self).cmp(rhs)) + } +} #[derive(Debug, Clone, PartialEq, Eq)] pub struct DecimalType(PhantomData); @@ -1058,9 +1123,11 @@ impl Decimal for i256 { fn do_round_mul(self, rhs: Self, shift_scale: u32) -> Option { let div = i256::e(shift_scale); let ret: Option = if self.is_negative() == rhs.is_negative() { - self.checked_mul(rhs).map(|x| (x + div / i256::from(2)) / div) + self.checked_mul(rhs) + .map(|x| (x + div / i256::from(2)) / div) } else { - self.checked_mul(rhs).map(|x| (x - div / i256::from(2)) / div) + self.checked_mul(rhs) + .map(|x| (x - div / i256::from(2)) / div) }; ret.or_else(|| { @@ -1093,9 +1160,11 @@ impl Decimal for i256 { let mul = i256::e(mul_scale); let ret: Option = if self.is_negative() == rhs.is_negative() { - self.checked_mul(mul).map(|x| (x + rhs / i256::from(2)) / rhs) + self.checked_mul(mul) + .map(|x| (x + rhs / i256::from(2)) / rhs) } else { - self.checked_mul(mul).map(|x| (x - rhs / i256::from(2)) / rhs) + self.checked_mul(mul) + .map(|x| (x - rhs / i256::from(2)) / rhs) }; ret.or_else(fallback) diff --git a/src/query/expression/src/types/variant.rs b/src/query/expression/src/types/variant.rs index ebd09f3fb1a8..2f856e780392 100644 --- a/src/query/expression/src/types/variant.rs +++ b/src/query/expression/src/types/variant.rs @@ -246,12 +246,8 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec jsonb::Value::Bool(b), ScalarRef::Binary(s) => jsonb::Value::Binary(s), ScalarRef::String(s) => jsonb::Value::String(s.into()), - ScalarRef::Timestamp(ts) => jsonb::Value::Timestamp(jsonb::Timestamp { - value: ts, - }), - ScalarRef::Date(d) => jsonb::Value::Date(jsonb::Date { - value: d, - }), + ScalarRef::Timestamp(ts) => jsonb::Value::Timestamp(jsonb::Timestamp { value: ts }), + ScalarRef::Date(d) => jsonb::Value::Date(jsonb::Date { value: d }), ScalarRef::Interval(i) => { let interval = jsonb::Interval { months: i.months(), diff --git a/src/query/expression/src/utils/mod.rs b/src/query/expression/src/utils/mod.rs index 682a24938299..cf398207e936 100644 --- a/src/query/expression/src/utils/mod.rs +++ b/src/query/expression/src/utils/mod.rs @@ -28,11 +28,11 @@ pub mod visitor; use databend_common_ast::Span; use databend_common_column::bitmap::Bitmap; use databend_common_exception::Result; -use crate::types::i256; pub use self::column_from::*; use crate::types::decimal::DecimalScalar; use crate::types::decimal::MAX_DECIMAL256_PRECISION; +use crate::types::i256; use crate::types::AnyType; use crate::types::DataType; use crate::types::DecimalDataType; @@ -169,18 +169,18 @@ fn shrink_i64(num: i64) -> Scalar { fn shrink_d256(decimal: i256, size: DecimalSize) -> Scalar { if size.scale == 0 { - if decimal.is_positive() && decimal <= i256::from(u64::MAX.into()) { - return shrink_u64(decimal.0.as_u64()); - } else if decimal <= i256::from(i64::MAX.into()) && decimal >= i256::from(i64::MIN.into()) { - return shrink_i64(decimal.0.as_i64()); + if decimal.is_positive() && decimal <= i256::from(u64::MAX) { + return shrink_u64(decimal.as_u64()); + } else if decimal <= i256::from(i64::MAX) && decimal >= i256::from(i64::MIN) { + return shrink_i64(decimal.as_i64()); } } - let valid_bits = 256 - decimal.0.saturating_abs().leading_zeros(); + let valid_bits = 256 - decimal.saturating_abs().leading_zeros(); let log10_2 = std::f64::consts::LOG10_2; let mut precision = ((valid_bits as f64) * log10_2).floor() as u8; - if i256(decimal.0.saturating_abs()) >= i256::from(10).pow(precision as u32) { + if decimal.saturating_abs() >= i256::from(10).pow(precision as u32) { precision += 1; } diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index 73e3a8780b20..07e4af159384 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -33,7 +33,6 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_io::prelude::BinaryRead; use enum_as_inner::EnumAsInner; -use crate::types::i256; use geo::Geometry; use geo::Point; use geozero::CoordDimensions; @@ -68,6 +67,7 @@ use crate::types::geography::GeographyColumn; use crate::types::geography::GeographyRef; use crate::types::geometry::compare_geometry; use crate::types::geometry::GeometryType; +use crate::types::i256; use crate::types::nullable::NullableColumn; use crate::types::nullable::NullableColumnBuilder; use crate::types::nullable::NullableColumnVec; @@ -1406,7 +1406,7 @@ impl Column { } DecimalDataType::Decimal256(size) => { let values = (0..len) - .map(|_| i256::from(rng.gen::().into())) + .map(|_| i256::from(rng.gen::())) .collect::>(); Column::Decimal(DecimalColumn::Decimal256(values.into(), *size)) } diff --git a/src/query/functions/src/aggregates/aggregate_array_agg.rs b/src/query/functions/src/aggregates/aggregate_array_agg.rs index 325afee0399d..0911b6f32473 100644 --- a/src/query/functions/src/aggregates/aggregate_array_agg.rs +++ b/src/query/functions/src/aggregates/aggregate_array_agg.rs @@ -22,6 +22,7 @@ use borsh::BorshDeserialize; use borsh::BorshSerialize; use databend_common_exception::Result; use databend_common_expression::types::decimal::*; +use databend_common_expression::types::i256; use databend_common_expression::types::number::*; use databend_common_expression::types::Bitmap; use databend_common_expression::types::DataType; @@ -35,7 +36,6 @@ use databend_common_expression::ColumnBuilder; use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; -use databend_common_expression::types::i256; use super::aggregate_function_factory::AggregateFunctionDescription; use super::aggregate_scalar_state::ScalarStateFunc; diff --git a/src/query/functions/src/aggregates/aggregate_array_moving.rs b/src/query/functions/src/aggregates/aggregate_array_moving.rs index 3a72203768f4..4e77587ed498 100644 --- a/src/query/functions/src/aggregates/aggregate_array_moving.rs +++ b/src/query/functions/src/aggregates/aggregate_array_moving.rs @@ -22,6 +22,7 @@ use borsh::BorshSerialize; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::decimal::*; +use databend_common_expression::types::i256; use databend_common_expression::types::number::Number; use databend_common_expression::types::ArgType; use databend_common_expression::types::Bitmap; @@ -44,7 +45,6 @@ use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; use num_traits::AsPrimitive; -use databend_common_expression::types::i256; use super::aggregate_function::AggregateFunction; use super::aggregate_function::AggregateFunctionRef; diff --git a/src/query/functions/src/aggregates/aggregate_bitmap.rs b/src/query/functions/src/aggregates/aggregate_bitmap.rs index fe7530165a45..eb7725aa7663 100644 --- a/src/query/functions/src/aggregates/aggregate_bitmap.rs +++ b/src/query/functions/src/aggregates/aggregate_bitmap.rs @@ -25,6 +25,7 @@ use std::sync::Arc; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::decimal::DecimalType; +use databend_common_expression::types::i256; use databend_common_expression::types::Bitmap; use databend_common_expression::types::MutableBitmap; use databend_common_expression::types::*; @@ -36,7 +37,6 @@ use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_io::prelude::BinaryWrite; use roaring::RoaringTreemap; -use databend_common_expression::types::i256; use super::aggregate_function_factory::AggregateFunctionDescription; use super::aggregate_function_factory::AggregateFunctionSortDesc; diff --git a/src/query/functions/src/aggregates/aggregate_histogram.rs b/src/query/functions/src/aggregates/aggregate_histogram.rs index 22549982cccf..750bf21062f5 100644 --- a/src/query/functions/src/aggregates/aggregate_histogram.rs +++ b/src/query/functions/src/aggregates/aggregate_histogram.rs @@ -23,12 +23,12 @@ use borsh::BorshSerialize; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::decimal::*; +use databend_common_expression::types::i256; use databend_common_expression::types::number::*; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::AggregateFunctionRef; use databend_common_expression::Scalar; -use databend_common_expression::types::i256; use serde::Deserialize; use serde::Serialize; use string::StringColumnBuilder; diff --git a/src/query/functions/src/aggregates/aggregate_min_max_any.rs b/src/query/functions/src/aggregates/aggregate_min_max_any.rs index 1bdefae2e15a..ba775e1af477 100644 --- a/src/query/functions/src/aggregates/aggregate_min_max_any.rs +++ b/src/query/functions/src/aggregates/aggregate_min_max_any.rs @@ -21,13 +21,13 @@ use borsh::BorshSerialize; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::decimal::*; +use databend_common_expression::types::i256; use databend_common_expression::types::number::*; use databend_common_expression::types::Bitmap; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::Scalar; use databend_common_expression::SELECTIVITY_THRESHOLD; -use databend_common_expression::types::i256; use super::aggregate_function_factory::AggregateFunctionDescription; use super::aggregate_function_factory::AggregateFunctionSortDesc; diff --git a/src/query/functions/src/aggregates/aggregate_quantile_cont.rs b/src/query/functions/src/aggregates/aggregate_quantile_cont.rs index b9bf9ff6fc74..7be60f43e8c2 100644 --- a/src/query/functions/src/aggregates/aggregate_quantile_cont.rs +++ b/src/query/functions/src/aggregates/aggregate_quantile_cont.rs @@ -23,6 +23,7 @@ use databend_common_exception::Result; use databend_common_expression::types::array::ArrayColumnBuilder; use databend_common_expression::types::decimal::Decimal; use databend_common_expression::types::decimal::DecimalType; +use databend_common_expression::types::i256; use databend_common_expression::types::number::*; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; @@ -30,7 +31,6 @@ use databend_common_expression::Column; use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; use num_traits::AsPrimitive; -use databend_common_expression::types::i256; use super::AggregateUnaryFunction; use super::FunctionData; diff --git a/src/query/functions/src/aggregates/aggregate_quantile_disc.rs b/src/query/functions/src/aggregates/aggregate_quantile_disc.rs index affd59cf47be..985f9b7ee729 100644 --- a/src/query/functions/src/aggregates/aggregate_quantile_disc.rs +++ b/src/query/functions/src/aggregates/aggregate_quantile_disc.rs @@ -20,11 +20,11 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::array::ArrayColumnBuilder; use databend_common_expression::types::decimal::*; +use databend_common_expression::types::i256; use databend_common_expression::types::number::*; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::Scalar; -use databend_common_expression::types::i256; use super::get_levels; use super::AggregateUnaryFunction; diff --git a/src/query/functions/src/aggregates/aggregate_range_bound.rs b/src/query/functions/src/aggregates/aggregate_range_bound.rs index 880703f72442..75c89c1c1507 100644 --- a/src/query/functions/src/aggregates/aggregate_range_bound.rs +++ b/src/query/functions/src/aggregates/aggregate_range_bound.rs @@ -21,6 +21,7 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::compare_columns; use databend_common_expression::types::array::ArrayColumnBuilder; +use databend_common_expression::types::i256; use databend_common_expression::types::Bitmap; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; @@ -31,7 +32,6 @@ use rand::rngs::SmallRng; use rand::thread_rng; use rand::Rng; use rand::SeedableRng; -use databend_common_expression::types::i256; use super::assert_unary_arguments; use super::AggregateUnaryFunction; diff --git a/src/query/functions/src/scalars/arithmetic/src/arithmetic.rs b/src/query/functions/src/scalars/arithmetic/src/arithmetic.rs index 427469f25243..a38539361007 100644 --- a/src/query/functions/src/scalars/arithmetic/src/arithmetic.rs +++ b/src/query/functions/src/scalars/arithmetic/src/arithmetic.rs @@ -20,10 +20,10 @@ use std::ops::BitXor; use std::str::FromStr; use std::sync::Arc; -use databend_common_expression::types::Decimal; use databend_common_expression::serialize::read_decimal_with_size; use databend_common_expression::types::decimal::DecimalDomain; use databend_common_expression::types::decimal::DecimalType; +use databend_common_expression::types::i256; use databend_common_expression::types::nullable::NullableColumn; use databend_common_expression::types::nullable::NullableDomain; use databend_common_expression::types::number::Number; @@ -33,6 +33,7 @@ use databend_common_expression::types::string::StringColumnBuilder; use databend_common_expression::types::AnyType; use databend_common_expression::types::Bitmap; use databend_common_expression::types::DataType; +use databend_common_expression::types::Decimal; use databend_common_expression::types::DecimalDataType; use databend_common_expression::types::NullableType; use databend_common_expression::types::NumberClass; @@ -65,7 +66,6 @@ use databend_functions_scalar_decimal::register_decimal_to_int; use databend_functions_scalar_decimal::register_decimal_to_string; use lexical_core::FormattedSize; use num_traits::AsPrimitive; -use databend_common_expression::types::i256; pub fn register(registry: &mut FunctionRegistry) { registry.register_aliases("plus", &["add"]); diff --git a/src/query/functions/src/scalars/decimal/src/arithmetic.rs b/src/query/functions/src/scalars/decimal/src/arithmetic.rs index 5959cf487c72..cc2850cfcb7a 100644 --- a/src/query/functions/src/scalars/decimal/src/arithmetic.rs +++ b/src/query/functions/src/scalars/decimal/src/arithmetic.rs @@ -16,6 +16,7 @@ use std::ops::*; use std::sync::Arc; use databend_common_expression::types::decimal::*; +use databend_common_expression::types::i256; use databend_common_expression::types::*; use databend_common_expression::vectorize_2_arg; use databend_common_expression::vectorize_with_builder_2_arg; @@ -26,7 +27,6 @@ use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionEval; use databend_common_expression::FunctionRegistry; use databend_common_expression::FunctionSignature; -use databend_common_expression::types::i256; use super::convert_to_decimal; use super::convert_to_decimal_domain; diff --git a/src/query/functions/src/scalars/decimal/src/cast.rs b/src/query/functions/src/scalars/decimal/src/cast.rs index 00b34a9faab4..6c2154416e6b 100644 --- a/src/query/functions/src/scalars/decimal/src/cast.rs +++ b/src/query/functions/src/scalars/decimal/src/cast.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use databend_common_base::base::OrderedFloat; use databend_common_expression::serialize::read_decimal_with_size; use databend_common_expression::types::decimal::*; +use databend_common_expression::types::i256; use databend_common_expression::types::string::StringColumnBuilder; use databend_common_expression::types::*; use databend_common_expression::vectorize_1_arg; @@ -38,7 +39,6 @@ use databend_common_expression::FunctionSignature; use databend_common_expression::Scalar; use databend_common_expression::Value; use num_traits::AsPrimitive; -use databend_common_expression::types::i256; // int float to decimal pub fn register_to_decimal(registry: &mut FunctionRegistry) { @@ -589,7 +589,7 @@ fn decimal_256_to_128( vectorize_with_builder_1_arg::, DecimalType>( |x: i256, builder: &mut Vec, ctx: &mut EvalContext| match x.checked_mul(factor) { - Some(x) if x <= max.into() && x >= min.into() => builder.push(*x.low()), + Some(x) if x <= max && x >= min => builder.push(*x.low()), _ => { ctx.set_error( builder.len(), @@ -614,7 +614,7 @@ fn decimal_256_to_128( }; match y { - Some(y) if (y <= max.into() && y >= min.into()) && (y != i256::from(0) || x / source_factor == i256::from(0)) => { + Some(y) if (y <= max && y >= min) && (y != 0 || x / source_factor == 0) => { builder.push(*y.low()); } _ => { @@ -635,7 +635,7 @@ macro_rules! m_decimal_to_decimal { ($from_size: expr, $dest_size: expr, $value: expr, $from_type_name: ty, $dest_type_name: ty, $ctx: expr) => { type F = $from_type_name; type T = $dest_type_name; -/** + let buffer: Value> = $value.try_downcast().unwrap(); // faster path let result: Value> = if $from_size.scale == $dest_size.scale @@ -656,7 +656,7 @@ macro_rules! m_decimal_to_decimal { let max = T::max_for_precision($dest_size.precision); let min = T::min_for_precision($dest_size.precision); - let source_factor = F::e($from_size.scale as u32); + let source_factor = T::e($from_size.scale as u32); vectorize_with_builder_1_arg::, DecimalType>( |x: F, builder: &mut Vec, ctx: &mut EvalContext| { @@ -707,8 +707,6 @@ macro_rules! m_decimal_to_decimal { }; result.upcast_decimal($dest_size) -*/ - todo!() }; } diff --git a/src/query/functions/src/scalars/decimal/src/comparison.rs b/src/query/functions/src/scalars/decimal/src/comparison.rs index f34ad415384f..511fd11db618 100644 --- a/src/query/functions/src/scalars/decimal/src/comparison.rs +++ b/src/query/functions/src/scalars/decimal/src/comparison.rs @@ -17,6 +17,7 @@ use std::ops::*; use std::sync::Arc; use databend_common_expression::types::decimal::*; +use databend_common_expression::types::i256; use databend_common_expression::types::*; use databend_common_expression::vectorize_cmp_2_arg; use databend_common_expression::Domain; @@ -27,7 +28,6 @@ use databend_common_expression::FunctionRegistry; use databend_common_expression::FunctionSignature; use databend_common_expression::SimpleDomainCmp; use databend_common_expression::Value; -use databend_common_expression::types::i256; use super::convert_to_decimal; use super::convert_to_decimal_domain; diff --git a/src/query/functions/src/scalars/decimal/src/math.rs b/src/query/functions/src/scalars/decimal/src/math.rs index b8c15b4acb1a..7a9a77586db3 100644 --- a/src/query/functions/src/scalars/decimal/src/math.rs +++ b/src/query/functions/src/scalars/decimal/src/math.rs @@ -16,6 +16,7 @@ use std::ops::*; use std::sync::Arc; use databend_common_expression::types::decimal::*; +use databend_common_expression::types::i256; use databend_common_expression::types::*; use databend_common_expression::vectorize_1_arg; use databend_common_expression::with_decimal_mapped_type; @@ -27,7 +28,6 @@ use databend_common_expression::FunctionRegistry; use databend_common_expression::FunctionSignature; use databend_common_expression::Scalar; use databend_common_expression::Value; -use databend_common_expression::types::i256; pub fn register_decimal_math(registry: &mut FunctionRegistry) { let factory_rounds = |params: &[Scalar], args_type: &[DataType], round_mode: RoundMode| { diff --git a/src/query/functions/src/scalars/hash.rs b/src/query/functions/src/scalars/hash.rs index 72e39a92d891..1dfbc0e51e9d 100644 --- a/src/query/functions/src/scalars/hash.rs +++ b/src/query/functions/src/scalars/hash.rs @@ -19,6 +19,7 @@ use std::hash::Hash; use std::hash::Hasher; use databend_common_expression::types::decimal::DecimalType; +use databend_common_expression::types::i256; use databend_common_expression::types::number::NumberScalar; use databend_common_expression::types::number::F32; use databend_common_expression::types::number::F64; @@ -42,7 +43,6 @@ use databend_common_expression::with_number_mapped_type; use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionRegistry; use databend_common_expression::Scalar; -use databend_common_expression::types::i256; use md5::Digest; use md5::Md5 as Md5Hasher; use naive_cityhash::cityhash64_with_seed; @@ -311,7 +311,7 @@ for_all_integer_types! { integer_impl } impl DFHash for i256 { #[inline] fn hash(&self, state: &mut H) { - Hash::hash(self.0.0.as_slice(), state); + Hash::hash(self.0 .0.as_slice(), state); } } diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index 5897e47b2fed..a19fecf0cbdc 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -22,6 +22,7 @@ use bstr::ByteSlice; use databend_common_column::types::months_days_micros; use databend_common_expression::types::binary::BinaryColumnBuilder; use databend_common_expression::types::date::string_to_date; +use databend_common_expression::types::i256; use databend_common_expression::types::nullable::NullableColumn; use databend_common_expression::types::nullable::NullableColumnBuilder; use databend_common_expression::types::nullable::NullableDomain; @@ -37,6 +38,10 @@ use databend_common_expression::types::Bitmap; use databend_common_expression::types::BooleanType; use databend_common_expression::types::DataType; use databend_common_expression::types::DateType; +use databend_common_expression::types::Decimal128Type; +use databend_common_expression::types::Decimal256Type; +use databend_common_expression::types::DecimalScalar; +use databend_common_expression::types::DecimalSize; use databend_common_expression::types::GenericType; use databend_common_expression::types::IntervalType; use databend_common_expression::types::MutableBitmap; @@ -732,24 +737,57 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); -/** registry.register_combine_nullable_1_arg::( "as_decimal128", |_, _| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|v, output, ctx| { - if let Some(validity) = &ctx.validity { - if !validity.get_bit(output.len()) { - output.push_null(); - return; + vectorize_with_builder_1_arg::>( + |v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } } - } - match RawJsonb::new(v).as_decimal128() { - Ok(Some(res)) => output.push(&res), - _ => output.push_null(), - } - }), + match RawJsonb::new(v).as_decimal128() { + Ok(Some(res)) => { + let size = DecimalSize { + precision: res.precision, + scale: res.scale, + }; + let val = DecimalScalar::Decimal128(res.value, size); + output.push(res.value); + } + _ => output.push_null(), + } + }, + ), + ); + + registry.register_combine_nullable_1_arg::( + "as_decimal256", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::>( + |v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match RawJsonb::new(v).as_decimal256() { + Ok(Some(res)) => { + let size = DecimalSize { + precision: res.precision, + scale: res.scale, + }; + let val = DecimalScalar::Decimal256(i256(res.value), size); + output.push(i256(res.value)); + } + _ => output.push_null(), + } + }, + ), ); -*/ registry.register_combine_nullable_1_arg::( "as_binary", diff --git a/src/query/functions/src/test_utils.rs b/src/query/functions/src/test_utils.rs index fd12046dbf04..70d03f9957fe 100644 --- a/src/query/functions/src/test_utils.rs +++ b/src/query/functions/src/test_utils.rs @@ -30,6 +30,7 @@ use databend_common_expression::type_check; use databend_common_expression::types::decimal::DecimalDataType; use databend_common_expression::types::decimal::DecimalScalar; use databend_common_expression::types::decimal::DecimalSize; +use databend_common_expression::types::i256; use databend_common_expression::types::DataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::types::NumberScalar; @@ -37,7 +38,6 @@ use databend_common_expression::ConstantFolder; use databend_common_expression::FunctionContext; use databend_common_expression::RawExpr; use databend_common_expression::Scalar; -use databend_common_expression::types::i256; use crate::BUILTIN_FUNCTIONS; diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index b2aa323c7668..19b95109f39e 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -21,7 +21,6 @@ use std::str::FromStr; use std::sync::Arc; use std::vec; -use databend_common_expression::types::i256; use databend_common_ast::ast::BinaryOperator; use databend_common_ast::ast::ColumnID; use databend_common_ast::ast::ColumnRef; @@ -72,6 +71,7 @@ use databend_common_expression::types::decimal::DecimalScalar; use databend_common_expression::types::decimal::DecimalSize; use databend_common_expression::types::decimal::MAX_DECIMAL128_PRECISION; use databend_common_expression::types::decimal::MAX_DECIMAL256_PRECISION; +use databend_common_expression::types::i256; use databend_common_expression::types::DataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::types::NumberScalar; diff --git a/src/query/storages/parquet/src/parquet_rs/statistics/column.rs b/src/query/storages/parquet/src/parquet_rs/statistics/column.rs index 9cda20a43d48..c410c370e61c 100644 --- a/src/query/storages/parquet/src/parquet_rs/statistics/column.rs +++ b/src/query/storages/parquet/src/parquet_rs/statistics/column.rs @@ -12,15 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use databend_common_expression::types::decimal::Decimal; use databend_common_expression::types::decimal::DecimalScalar; +use databend_common_expression::types::i256; use databend_common_expression::types::DecimalDataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::Scalar; use databend_common_expression::TableDataType; use databend_storages_common_table_meta::meta::ColumnStatistics; -use databend_common_expression::types::i256; - use parquet::data_type::AsBytes; use parquet::file::statistics::Statistics; @@ -59,8 +57,8 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option< Scalar::Decimal(DecimalScalar::Decimal128(i128::from(min), *size)), ), TableDataType::Decimal(DecimalDataType::Decimal256(size)) => ( - Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max.into()), *size)), - Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min.into()), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min), *size)), ), _ => return None, } @@ -90,8 +88,8 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option< Scalar::Decimal(DecimalScalar::Decimal128(i128::from(min), *size)), ), TableDataType::Decimal(DecimalDataType::Decimal256(size)) => ( - Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max.into()), *size)), - Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min.into()), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min), *size)), ), _ => return None, } diff --git a/src/query/storages/parquet/src/parquet_rs/statistics/page.rs b/src/query/storages/parquet/src/parquet_rs/statistics/page.rs index bbb97ef2162a..016d9a514716 100644 --- a/src/query/storages/parquet/src/parquet_rs/statistics/page.rs +++ b/src/query/storages/parquet/src/parquet_rs/statistics/page.rs @@ -12,13 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use databend_common_expression::types::decimal::Decimal; use databend_common_expression::types::decimal::DecimalScalar; +use databend_common_expression::types::i256; use databend_common_expression::types::DecimalDataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::Scalar; use databend_common_expression::TableDataType; -use databend_common_expression::types::i256; use databend_storages_common_table_meta::meta::ColumnStatistics; use parquet::data_type::AsBytes; use parquet::data_type::ByteArray; @@ -135,8 +134,8 @@ fn convert_page_index_int32( Scalar::Decimal(DecimalScalar::Decimal128(i128::from(min), *size)), ), TableDataType::Decimal(DecimalDataType::Decimal256(size)) => ( - Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max.into()), *size)), - Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min.into()), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min), *size)), ), _ => unreachable!(), }; @@ -165,8 +164,8 @@ fn convert_page_index_int64( Scalar::Decimal(DecimalScalar::Decimal128(i128::from(min), *size)), ), TableDataType::Decimal(DecimalDataType::Decimal256(size)) => ( - Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max.into()), *size)), - Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min.into()), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(max), *size)), + Scalar::Decimal(DecimalScalar::Decimal256(i256::from(min), *size)), ), _ => unreachable!(), }; diff --git a/src/query/storages/parquet/src/parquet_rs/statistics/utils.rs b/src/query/storages/parquet/src/parquet_rs/statistics/utils.rs index 7e4c6dddd7bd..e1fc7efa6c1a 100644 --- a/src/query/storages/parquet/src/parquet_rs/statistics/utils.rs +++ b/src/query/storages/parquet/src/parquet_rs/statistics/utils.rs @@ -16,7 +16,6 @@ use arrow_buffer::i256; use databend_common_expression::types::decimal::DecimalScalar; use databend_common_expression::types::DecimalSize; use databend_common_expression::Scalar; -use ethnum::I256; use parquet::data_type::AsBytes; use parquet::data_type::FixedLenByteArray; @@ -28,7 +27,7 @@ pub fn decode_decimal128_from_bytes(arr: &FixedLenByteArray, size: DecimalSize) pub fn decode_decimal256_from_bytes(arr: &FixedLenByteArray, size: DecimalSize) -> Scalar { let v = i256::from_be_bytes(sign_extend_be(arr.as_bytes())); let (lo, hi) = v.to_parts(); - let v = databend_common_expression::types::i256(I256::from_words(hi, lo as i128)); + let v = databend_common_expression::types::i256::from_words(hi, lo as i128); Scalar::Decimal(DecimalScalar::Decimal256(v, size)) } diff --git a/src/query/storages/stage/src/read/avro/decoder.rs b/src/query/storages/stage/src/read/avro/decoder.rs index 363cc227fe6d..ba2952cb8ac0 100644 --- a/src/query/storages/stage/src/read/avro/decoder.rs +++ b/src/query/storages/stage/src/read/avro/decoder.rs @@ -42,7 +42,6 @@ use databend_common_meta_app::principal::NullAs; use lexical_core::FromLexical; use num_bigint::BigInt; use num_traits::NumCast; -use databend_common_expression::types::i256; use crate::read::avro::avro_to_jsonb::to_jsonb; use crate::read::avro::schema_match::MatchedField; diff --git a/src/query/storages/stage/src/read/avro/schema_match.rs b/src/query/storages/stage/src/read/avro/schema_match.rs index ee5839c00b5a..1cfefcb1c747 100644 --- a/src/query/storages/stage/src/read/avro/schema_match.rs +++ b/src/query/storages/stage/src/read/avro/schema_match.rs @@ -15,10 +15,10 @@ use apache_avro::schema::RecordSchema; use apache_avro::schema::UnionSchema; use apache_avro::Schema; +use databend_common_expression::types::i256; use databend_common_expression::types::Decimal; use databend_common_expression::types::NumberDataType; use databend_common_expression::TableDataType; -use databend_common_expression::types::i256; type MatchResult = Result; From 43ae52898883009a9d447f9686aab1eb25db8016 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 24 Apr 2025 17:23:39 +0800 Subject: [PATCH 06/13] fix --- src/query/expression/src/types/decimal.rs | 1676 +++++++++-------- src/query/expression/tests/it/decimal.rs | 2 +- src/query/expression/tests/it/row.rs | 2 +- src/query/functions/src/scalars/variant.rs | 57 - .../tests/it/aggregates/agg_hashtable.rs | 4 +- .../functions/tests/it/scalars/arithmetic.rs | 2 +- src/query/functions/tests/it/scalars/geo.rs | 9 + src/query/functions/tests/it/type_check.rs | 4 +- .../storages/stage/src/read/avro/decoder.rs | 2 +- 9 files changed, 857 insertions(+), 901 deletions(-) diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index 4d4c65cb28bd..0e30ac6d05c4 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -37,7 +37,6 @@ use databend_common_exception::Result; use databend_common_io::display_decimal_128; use databend_common_io::display_decimal_256; use enum_as_inner::EnumAsInner; -// use ethnum::i256; use ethnum::u256; use ethnum::AsI256; use itertools::Itertools; @@ -49,7 +48,6 @@ use num_traits::ToPrimitive; use serde::Deserialize; use serde::Serialize; -// use databend_common_column::types::NativeType; use super::SimpleDomain; use crate::types::ArgType; use crate::types::DataType; @@ -62,767 +60,650 @@ use crate::Domain; use crate::Scalar; use crate::ScalarRef; -// use databend_common_column::types::i256; - -/// Physical representation of a decimal -#[derive(Clone, Copy, Default, Eq, Ord, Serialize, Deserialize)] -#[allow(non_camel_case_types)] -#[repr(C)] -pub struct i256(pub ethnum::I256); - -impl i256 { - /// The additive identity for this integer type, i.e. `0`. - pub const ZERO: Self = Self(ethnum::I256([0; 2])); +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DecimalType(PhantomData); - /// The multiplicative identity for this integer type, i.e. `1`. - pub const ONE: Self = Self(ethnum::I256::new(1)); +pub type Decimal128Type = DecimalType; +pub type Decimal256Type = DecimalType; - /// The multiplicative inverse for this integer type, i.e. `-1`. - pub const MINUS_ONE: Self = Self(ethnum::I256::new(-1)); +impl ValueType for DecimalType { + type Scalar = Num; + type ScalarRef<'a> = Num; + type Column = Buffer; + type Domain = SimpleDomain; + type ColumnIterator<'a> = std::iter::Cloned>; + type ColumnBuilder = Vec; - // pub fn from(value: i128) -> Self { - // Self(value.as_i256()) - //} + fn to_owned_scalar(scalar: Self::ScalarRef<'_>) -> Self::Scalar { + scalar + } - /// Creates a new 256-bit integer value from a primitive `i128` integer. - #[inline] - pub const fn new(value: i128) -> Self { - Self(ethnum::I256::new(value)) + fn to_scalar_ref(scalar: &Self::Scalar) -> Self::ScalarRef<'_> { + *scalar } - /// Returns a new [`i256`] from two `i128`. - pub fn from_words(hi: i128, lo: i128) -> Self { - Self(ethnum::I256::from_words(hi, lo)) + fn try_downcast_scalar<'a>(scalar: &'a ScalarRef) -> Option> { + Num::try_downcast_scalar(scalar.as_decimal()?) } - #[inline] - pub const fn to_le_bytes(&self) -> [u8; 32] { - let (high, low) = self.0.into_words(); - let low = low.to_le_bytes(); - let high = high.to_le_bytes(); - let mut i = 0; - let mut bytes = [0u8; 32]; - while i != 16 { - bytes[i] = low[i]; - bytes[i + 16] = high[i]; - i += 1; + fn try_downcast_column(col: &Column) -> Option { + let down_col = Num::try_downcast_column(col); + if let Some(col) = down_col { + Some(col.0) + } else { + None } - bytes } - #[inline] - pub const fn to_be_bytes(&self) -> [u8; 32] { - let (high, low) = self.0.into_words(); - let low = low.to_be_bytes(); - let high = high.to_be_bytes(); - let mut bytes = [0; 32]; - let mut i = 0; - while i != 16 { - bytes[i] = high[i]; - bytes[i + 16] = low[i]; - i += 1; - } - bytes + fn try_downcast_domain(domain: &Domain) -> Option { + Num::try_downcast_domain(domain.as_decimal()?) } - #[inline] - pub const fn from_be_bytes(bytes: [u8; 32]) -> Self { - let mut low = [0; 16]; - let mut high = [0; 16]; - let mut i = 0; - while i != 16 { - high[i] = bytes[i]; - low[i] = bytes[i + 16]; - i += 1; - } - let high = i128::from_be_bytes(high); - let low = i128::from_be_bytes(low); - Self(ethnum::I256::from_words(high, low)) + fn try_downcast_builder(builder: &mut ColumnBuilder) -> Option<&mut Self::ColumnBuilder> { + Num::try_downcast_builder(builder) } - #[inline] - pub const fn from_le_bytes(bytes: [u8; 32]) -> Self { - let mut low = [0; 16]; - let mut high = [0; 16]; - let mut i = 0; - while i != 16 { - low[i] = bytes[i]; - high[i] = bytes[i + 16]; - i += 1; - } - let high = i128::from_be_bytes(high); - let low = i128::from_be_bytes(low); - Self(ethnum::I256::from_words(high, low)) + fn try_downcast_owned_builder(builder: ColumnBuilder) -> Option { + Num::try_downcast_owned_builder(builder) } - #[inline] - pub const fn is_positive(self) -> bool { - self.0.is_positive() + fn try_upcast_column_builder( + builder: Self::ColumnBuilder, + decimal_size: Option, + ) -> Option { + Some(ColumnBuilder::Decimal(Num::upcast_builder( + builder, + decimal_size.unwrap(), + ))) } - #[inline] - pub const fn is_negative(self) -> bool { - self.0.is_negative() + fn upcast_scalar(scalar: Self::Scalar) -> Scalar { + Num::upcast_scalar(scalar, Num::default_decimal_size()) } - #[inline] - pub fn saturating_abs(self) -> Self { - Self(self.0.saturating_abs()) + fn upcast_column(col: Self::Column) -> Column { + Num::upcast_column(col, Num::default_decimal_size()) } - #[inline(always)] - pub fn leading_zeros(self) -> u32 { - self.0.leading_zeros() + fn upcast_domain(domain: Self::Domain) -> Domain { + Num::upcast_domain(domain, Num::default_decimal_size()) } - /// Cast to a primitive `u64`. - #[inline] - pub const fn as_u64(self) -> u64 { - self.0.as_u64() + fn column_len(col: &Self::Column) -> usize { + col.len() } - /// Cast to a primitive `i64`. - #[inline] - pub const fn as_i64(self) -> i64 { - self.0.as_i64() + fn index_column(col: &Self::Column, index: usize) -> Option> { + col.get(index).cloned() } - /// Cast to a primitive `i128`. - #[inline] - pub const fn as_i128(self) -> i128 { - self.0.as_i128() + #[inline(always)] + unsafe fn index_column_unchecked(col: &Self::Column, index: usize) -> Self::ScalarRef<'_> { + debug_assert!(index < col.len()); + + *col.get_unchecked(index) } - /// Get the low 128-bit word for this signed integer. - #[inline] - pub fn low(&self) -> &i128 { - self.0.low() + fn slice_column(col: &Self::Column, range: Range) -> Self::Column { + col.clone().sliced(range.start, range.end - range.start) } - /// Get the high 128-bit word for this signed integer. - #[inline] - pub fn high(&self) -> &i128 { - self.0.high() + fn iter_column(col: &Self::Column) -> Self::ColumnIterator<'_> { + col.iter().cloned() } - #[allow(unused_attributes)] - #[inline] - pub fn abs(self) -> Self { - Self(self.0.abs()) + fn column_to_builder(col: Self::Column) -> Self::ColumnBuilder { + buffer_into_mut(col) } - #[inline] - pub fn checked_neg(self) -> Option { - self.0.checked_neg().map(|v| Self(v)) + fn builder_len(builder: &Self::ColumnBuilder) -> usize { + builder.len() } - /// Raises self to the power of `exp`, using exponentiation by squaring. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// # use ethnum::I256; - /// - /// assert_eq!(I256::new(2).pow(5), 32); - /// ``` - #[must_use = "this returns the result of the operation, \ - without modifying the original"] - #[inline] - pub fn pow(self, mut exp: u32) -> Self { - Self(self.0.pow(exp)) + fn push_item(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>) { + builder.push(item) } -} -impl Neg for i256 { - type Output = Self; + fn push_item_repeat(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>, n: usize) { + if n == 1 { + builder.push(item) + } else { + builder.resize(builder.len() + n, item) + } + } - #[inline] - fn neg(self) -> Self::Output { - let (a, b) = self.0.into_words(); - Self(ethnum::I256::from_words(-a, b)) + fn push_default(builder: &mut Self::ColumnBuilder) { + builder.push(Num::default()) } -} -impl std::fmt::Debug for i256 { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{:?}", self.0) + fn append_column(builder: &mut Self::ColumnBuilder, other: &Self::Column) { + builder.extend_from_slice(other); } -} -impl std::fmt::Display for i256 { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.0) + fn build_column(builder: Self::ColumnBuilder) -> Self::Column { + builder.into() } -} -impl AddAssign for i256 { - fn add_assign(&mut self, rhs: Self) { - self.0 += rhs.0; + fn build_scalar(builder: Self::ColumnBuilder) -> Self::Scalar { + assert_eq!(builder.len(), 1); + builder[0] } -} -impl SubAssign for i256 { - fn sub_assign(&mut self, rhs: Self) { - self.0 -= rhs.0; + #[inline(always)] + fn compare(lhs: Self::ScalarRef<'_>, rhs: Self::ScalarRef<'_>) -> Ordering { + lhs.cmp(&rhs) } -} -impl MulAssign for i256 { - fn mul_assign(&mut self, rhs: Self) { - self.0 *= rhs.0; + #[inline(always)] + fn equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left == right } -} -impl DivAssign for i256 { - fn div_assign(&mut self, rhs: Self) { - self.0 /= rhs.0; + #[inline(always)] + fn not_equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left != right } -} -impl Add for i256 { - type Output = Self; + #[inline(always)] + fn greater_than(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left > right + } - fn add(self, rhs: Self) -> Self::Output { - Self(self.0 + rhs.0) + #[inline(always)] + fn greater_than_equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left >= right } -} -impl Sub for i256 { - type Output = Self; + #[inline(always)] + fn less_than(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left < right + } - fn sub(self, rhs: Self) -> Self::Output { - Self(self.0 - rhs.0) + #[inline(always)] + fn less_than_equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left <= right } } -impl Mul for i256 { - type Output = Self; - - fn mul(self, rhs: Self) -> Self::Output { - Self(self.0 * rhs.0) - } -} - -impl Div for i256 { - type Output = Self; - - fn div(self, rhs: Self) -> Self::Output { - Self(self.0 / rhs.0) +impl ArgType for DecimalType { + fn data_type() -> DataType { + Num::data_type() } -} -macro_rules! impl_from { - ($($t:ty),* $(,)?) => {$( - impl From<$t> for i256 { - #[inline] - fn from(value: $t) -> Self { - i256(value.as_i256()) - } + fn full_domain() -> Self::Domain { + SimpleDomain { + min: Num::MIN, + max: Num::MAX, } - )*}; -} - -impl_from! { - bool, - i8, i16, i32, i64, i128, - u8, u16, u32, u64, u128, -} - -impl TryFrom for i256 { - type Error = TryFromIntError; + } - fn try_from(value: u256) -> std::result::Result { - let i256_value = ethnum::i256::try_from(value)?; - Ok(i256(i256_value)) + fn create_builder(capacity: usize, _generics: &GenericMap) -> Self::ColumnBuilder { + Vec::with_capacity(capacity) } -} -impl BorshSerialize for i256 { - fn serialize(&self, writer: &mut W) -> borsh::io::Result<()> { - BorshSerialize::serialize(&self.0 .0, writer) + fn column_from_vec(vec: Vec, _generics: &GenericMap) -> Self::Column { + vec.into() } -} -impl BorshDeserialize for i256 { - fn deserialize_reader(reader: &mut R) -> borsh::io::Result { - let value: [i128; 2] = BorshDeserialize::deserialize_reader(reader)?; - Ok(Self(ethnum::I256(value))) + fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { + iter.collect() } -} -impl Marshal for i256 { - fn marshal(&self, scratch: &mut [u8]) { - self.0.marshal(scratch); + fn column_from_ref_iter<'a>( + iter: impl Iterator>, + _: &GenericMap, + ) -> Self::Column { + iter.collect() } } -macro_rules! impl_into_float { - ($($t:ty => $f:ident),* $(,)?) => {$( - impl From for $t { - #[inline] - fn from(x: i256) -> $t { - x.0.$f() - } - } - )*}; +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize, + BorshSerialize, + BorshDeserialize, + EnumAsInner, +)] +pub enum DecimalDataType { + Decimal128(DecimalSize), + Decimal256(DecimalSize), } -impl_into_float! { - f32 => as_f32, f64 => as_f64, +#[derive( + Clone, + Copy, + PartialEq, + Eq, + EnumAsInner, + Serialize, + Deserialize, + BorshSerialize, + BorshDeserialize, +)] +pub enum DecimalScalar { + Decimal128(i128, DecimalSize), + Decimal256(i256, DecimalSize), } -impl core::hash::Hash for i256 { - #[inline] - fn hash(&self, hasher: &mut H) - where H: core::hash::Hasher { - core::hash::Hash::hash(&self.0, hasher); +impl DecimalScalar { + pub fn to_float64(&self) -> f64 { + match self { + DecimalScalar::Decimal128(v, size) => i128::to_float64(*v, size.scale), + DecimalScalar::Decimal256(v, size) => i256::to_float64(*v, size.scale), + } + } + pub fn is_positive(&self) -> bool { + match self { + DecimalScalar::Decimal128(v, _) => i128::is_positive(*v), + DecimalScalar::Decimal256(v, _) => i256::is_positive(*v), + } } } -impl PartialEq for i256 { - #[inline] - fn eq(&self, other: &Self) -> bool { - let (ahi, alo) = self.0.into_words(); - let (bhi, blo) = other.0.into_words(); - (ahi == bhi) & (alo == blo) - // bitwise and rather than logical and - // to make O0 code more effecient. - } +#[derive(Clone, PartialEq, EnumAsInner)] +pub enum DecimalColumn { + Decimal128(Buffer, DecimalSize), + Decimal256(Buffer, DecimalSize), } -impl PartialEq for i256 { - #[inline] - fn eq(&self, other: &i128) -> bool { - *self == i256::new(*other) - } +#[derive(Clone, PartialEq, EnumAsInner, Debug)] +pub enum DecimalColumnVec { + Decimal128(Vec>, DecimalSize), + Decimal256(Vec>, DecimalSize), } -impl PartialEq for i128 { - #[inline] - fn eq(&self, other: &i256) -> bool { - i256::new(*self) == *other - } +#[derive(Debug, Clone, PartialEq, Eq, EnumAsInner)] +pub enum DecimalColumnBuilder { + Decimal128(Vec, DecimalSize), + Decimal256(Vec, DecimalSize), } -impl PartialOrd for i256 { - #[inline] - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } +#[derive(Debug, Clone, Copy, PartialEq, Eq, EnumAsInner)] +pub enum DecimalDomain { + Decimal128(SimpleDomain, DecimalSize), + Decimal256(SimpleDomain, DecimalSize), } -impl PartialOrd for i256 { - #[inline] - fn partial_cmp(&self, rhs: &i128) -> Option { - Some(self.cmp(&i256::new(*rhs))) +impl DecimalDomain { + pub fn decimal_size(&self) -> DecimalSize { + match self { + DecimalDomain::Decimal128(_, size) => *size, + DecimalDomain::Decimal256(_, size) => *size, + } } } -impl PartialOrd for i128 { - #[inline] - fn partial_cmp(&self, rhs: &i256) -> Option { - Some(i256::new(*self).cmp(rhs)) - } +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize, + BorshSerialize, + BorshDeserialize, +)] +pub struct DecimalSize { + pub precision: u8, + pub scale: u8, } -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct DecimalType(PhantomData); +pub trait Decimal: + Sized + + Default + + Debug + + std::fmt::Display + + Copy + + Clone + + PartialEq + + Eq + + std::ops::AddAssign + + PartialOrd + + Ord + + Sync + + Send + + 'static +{ + // the Layout align size of i128 and i256 have changed + // https://blog.rust-lang.org/2024/03/30/i128-layout-update.html + // Here we keep this struct in aggregate state which minimize the align of the struct + type U64Array: Send + Sync + Copy + Default + Debug; + fn zero() -> Self; + fn one() -> Self; + fn minus_one() -> Self; -pub type Decimal128Type = DecimalType; -pub type Decimal256Type = DecimalType; + // 10**scale + fn e(n: u32) -> Self; + fn mem_size() -> usize; -impl ValueType for DecimalType { - type Scalar = Num; - type ScalarRef<'a> = Num; - type Column = Buffer; - type Domain = SimpleDomain; - type ColumnIterator<'a> = std::iter::Cloned>; - type ColumnBuilder = Vec; + fn to_u64_array(self) -> Self::U64Array; + fn from_u64_array(v: Self::U64Array) -> Self; - fn to_owned_scalar(scalar: Self::ScalarRef<'_>) -> Self::Scalar { - scalar - } + fn checked_add(self, rhs: Self) -> Option; + fn checked_sub(self, rhs: Self) -> Option; + fn checked_div(self, rhs: Self) -> Option; + fn checked_mul(self, rhs: Self) -> Option; + fn checked_rem(self, rhs: Self) -> Option; - fn to_scalar_ref(scalar: &Self::Scalar) -> Self::ScalarRef<'_> { - *scalar - } + fn do_round_div(self, rhs: Self, mul_scale: u32) -> Option; - fn try_downcast_scalar<'a>(scalar: &'a ScalarRef) -> Option> { - Num::try_downcast_scalar(scalar.as_decimal()?) - } + // mul two decimals and return a decimal with rounding option + fn do_round_mul(self, rhs: Self, shift_scale: u32) -> Option; - fn try_downcast_column(col: &Column) -> Option { - let down_col = Num::try_downcast_column(col); - if let Some(col) = down_col { - Some(col.0) - } else { - None - } - } + fn min_for_precision(precision: u8) -> Self; + fn max_for_precision(precision: u8) -> Self; - fn try_downcast_domain(domain: &Domain) -> Option { - Num::try_downcast_domain(domain.as_decimal()?) - } + fn default_decimal_size() -> DecimalSize; - fn try_downcast_builder(builder: &mut ColumnBuilder) -> Option<&mut Self::ColumnBuilder> { - Num::try_downcast_builder(builder) - } + fn from_float(value: f64) -> Self; + fn from_i128>(value: U) -> Self; + fn from_i256(value: i256) -> Self; + fn from_bigint(value: BigInt) -> Option; - fn try_downcast_owned_builder(builder: ColumnBuilder) -> Option { - Num::try_downcast_owned_builder(builder) + fn de_binary(bytes: &mut &[u8]) -> Self; + fn display(self, scale: u8) -> String; + + fn to_float32(self, scale: u8) -> f32; + fn to_float64(self, scale: u8) -> f64; + + fn to_int(self, scale: u8, rounding_mode: bool) -> Option; + + fn try_downcast_column(column: &Column) -> Option<(Buffer, DecimalSize)>; + fn try_downcast_builder(builder: &mut ColumnBuilder) -> Option<&mut Vec>; + + fn try_downcast_owned_builder(builder: ColumnBuilder) -> Option>; + + fn try_downcast_scalar(scalar: &DecimalScalar) -> Option; + fn try_downcast_domain(domain: &DecimalDomain) -> Option>; + + fn upcast_scalar(scalar: Self, size: DecimalSize) -> Scalar; + fn upcast_column(col: Buffer, size: DecimalSize) -> Column; + fn upcast_domain(domain: SimpleDomain, size: DecimalSize) -> Domain; + fn upcast_builder(builder: Vec, size: DecimalSize) -> DecimalColumnBuilder; + fn data_type() -> DataType; + const MIN: Self; + const MAX: Self; + + fn to_column_from_buffer(value: Buffer, size: DecimalSize) -> DecimalColumn; + + fn to_column(value: Vec, size: DecimalSize) -> DecimalColumn { + Self::to_column_from_buffer(value.into(), size) } - fn try_upcast_column_builder( - builder: Self::ColumnBuilder, - decimal_size: Option, - ) -> Option { - Some(ColumnBuilder::Decimal(Num::upcast_builder( - builder, - decimal_size.unwrap(), - ))) + fn to_scalar(self, size: DecimalSize) -> DecimalScalar; + + fn with_size(&self, size: DecimalSize) -> Option { + let multiplier = Self::e(size.scale as u32); + let min_for_precision = Self::min_for_precision(size.precision); + let max_for_precision = Self::max_for_precision(size.precision); + self.checked_mul(multiplier).and_then(|v| { + if v > max_for_precision || v < min_for_precision { + None + } else { + Some(v) + } + }) } +} - fn upcast_scalar(scalar: Self::Scalar) -> Scalar { - Num::upcast_scalar(scalar, Num::default_decimal_size()) +impl Decimal for i128 { + type U64Array = [u64; 2]; + + fn to_u64_array(self) -> Self::U64Array { + unsafe { std::mem::transmute(self) } } - fn upcast_column(col: Self::Column) -> Column { - Num::upcast_column(col, Num::default_decimal_size()) + fn from_u64_array(v: Self::U64Array) -> Self { + unsafe { std::mem::transmute(v) } } - fn upcast_domain(domain: Self::Domain) -> Domain { - Num::upcast_domain(domain, Num::default_decimal_size()) + fn zero() -> Self { + 0_i128 } - fn column_len(col: &Self::Column) -> usize { - col.len() + fn one() -> Self { + 1_i128 } - fn index_column(col: &Self::Column, index: usize) -> Option> { - col.get(index).cloned() + fn minus_one() -> Self { + -1_i128 } - #[inline(always)] - unsafe fn index_column_unchecked(col: &Self::Column, index: usize) -> Self::ScalarRef<'_> { - debug_assert!(index < col.len()); + fn e(n: u32) -> Self { + 10_i128.pow(n) + } - *col.get_unchecked(index) + fn mem_size() -> usize { + 16 } - fn slice_column(col: &Self::Column, range: Range) -> Self::Column { - col.clone().sliced(range.start, range.end - range.start) + fn checked_add(self, rhs: Self) -> Option { + self.checked_add(rhs) } - fn iter_column(col: &Self::Column) -> Self::ColumnIterator<'_> { - col.iter().cloned() + fn checked_sub(self, rhs: Self) -> Option { + self.checked_sub(rhs) } - fn column_to_builder(col: Self::Column) -> Self::ColumnBuilder { - buffer_into_mut(col) + fn checked_div(self, rhs: Self) -> Option { + self.checked_div(rhs) } - fn builder_len(builder: &Self::ColumnBuilder) -> usize { - builder.len() + fn checked_mul(self, rhs: Self) -> Option { + self.checked_mul(rhs) } - fn push_item(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>) { - builder.push(item) + fn checked_rem(self, rhs: Self) -> Option { + self.checked_rem(rhs) } - fn push_item_repeat(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>, n: usize) { - if n == 1 { - builder.push(item) + fn do_round_mul(self, rhs: Self, shift_scale: u32) -> Option { + let div = i256::e(shift_scale); + let res = if self.is_negative() == rhs.is_negative() { + (i256::from(self) * i256::from(rhs) + div / i256::from(2)) / div } else { - builder.resize(builder.len() + n, item) - } - } + (i256::from(self) * i256::from(rhs) - div / i256::from(2)) / div + }; - fn push_default(builder: &mut Self::ColumnBuilder) { - builder.push(Num::default()) + if !(i256::from(i128::MIN)..=i256::from(i128::MAX)).contains(&res) { + None + } else { + Some(res.as_i128()) + } } - fn append_column(builder: &mut Self::ColumnBuilder, other: &Self::Column) { - builder.extend_from_slice(other); + fn do_round_div(self, rhs: Self, mul_scale: u32) -> Option { + let mul = i256::e(mul_scale); + if self.is_negative() == rhs.is_negative() { + let res = (i256::from(self) * mul + i256::from(rhs) / i256::from(2)) / i256::from(rhs); + Some(*res.low()) + } else { + let res = (i256::from(self) * mul - i256::from(rhs) / i256::from(2)) / i256::from(rhs); + Some(*res.low()) + } } - fn build_column(builder: Self::ColumnBuilder) -> Self::Column { - builder.into() + fn min_for_precision(to_precision: u8) -> Self { + MIN_DECIMAL_FOR_EACH_PRECISION[to_precision as usize - 1] } - fn build_scalar(builder: Self::ColumnBuilder) -> Self::Scalar { - assert_eq!(builder.len(), 1); - builder[0] + fn max_for_precision(to_precision: u8) -> Self { + MAX_DECIMAL_FOR_EACH_PRECISION[to_precision as usize - 1] } - #[inline(always)] - fn compare(lhs: Self::ScalarRef<'_>, rhs: Self::ScalarRef<'_>) -> Ordering { - lhs.cmp(&rhs) + fn default_decimal_size() -> DecimalSize { + DecimalSize { + precision: MAX_DECIMAL128_PRECISION, + scale: 0, + } } - #[inline(always)] - fn equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { - left == right + fn to_column_from_buffer(value: Buffer, size: DecimalSize) -> DecimalColumn { + DecimalColumn::Decimal128(value, size) } - #[inline(always)] - fn not_equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { - left != right - } + fn from_float(value: f64) -> Self { + // still needs to be optimized. + // An implementation similar to float64_as_i256 obtained from the ethnum library + const M: u64 = (f64::MANTISSA_DIGITS - 1) as u64; + const MAN_MASK: u64 = !(!0 << M); + const MAN_ONE: u64 = 1 << M; + const EXP_MASK: u64 = !0 >> f64::MANTISSA_DIGITS; + const EXP_OFFSET: u64 = EXP_MASK / 2; + const ABS_MASK: u64 = !0 >> 1; + const SIG_MASK: u64 = !ABS_MASK; - #[inline(always)] - fn greater_than(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { - left > right + let abs = f64::from_bits(value.to_bits() & ABS_MASK); + let sign = -(((value.to_bits() & SIG_MASK) >> (u64::BITS - 2)) as i128).wrapping_sub(1); // if self >= 0. { 1 } else { -1 } + if abs >= 1.0 { + let bits = abs.to_bits(); + let exponent = ((bits >> M) & EXP_MASK) - EXP_OFFSET; + let mantissa = (bits & MAN_MASK) | MAN_ONE; + if exponent <= M { + (>::from(mantissa >> (M - exponent))) * sign + } else if exponent < 127 { + (>::from(mantissa) << (exponent - M)) * sign + } else if sign > 0 { + i128::MAX + } else { + i128::MIN + } + } else { + Self::zero() + } } - #[inline(always)] - fn greater_than_equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { - left >= right + fn from_i128>(value: U) -> Self { + value.into() } - #[inline(always)] - fn less_than(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { - left < right + fn from_i256(value: i256) -> Self { + value.as_i128() } - #[inline(always)] - fn less_than_equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { - left <= right + fn from_bigint(value: BigInt) -> Option { + value.to_i128() } -} -impl ArgType for DecimalType { - fn data_type() -> DataType { - Num::data_type() - } + fn de_binary(bytes: &mut &[u8]) -> Self { + let bs: [u8; std::mem::size_of::()] = + bytes[0..std::mem::size_of::()].try_into().unwrap(); + *bytes = &bytes[std::mem::size_of::()..]; - fn full_domain() -> Self::Domain { - SimpleDomain { - min: Num::MIN, - max: Num::MAX, - } + i128::from_le_bytes(bs) } - fn create_builder(capacity: usize, _generics: &GenericMap) -> Self::ColumnBuilder { - Vec::with_capacity(capacity) + fn display(self, scale: u8) -> String { + display_decimal_128(self, scale) } - fn column_from_vec(vec: Vec, _generics: &GenericMap) -> Self::Column { - vec.into() + fn to_float32(self, scale: u8) -> f32 { + let div = 10_f32.powi(scale as i32); + self as f32 / div } - fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { - iter.collect() + fn to_float64(self, scale: u8) -> f64 { + let div = 10_f64.powi(scale as i32); + self as f64 / div } - fn column_from_ref_iter<'a>( - iter: impl Iterator>, - _: &GenericMap, - ) -> Self::Column { - iter.collect() + fn to_int(self, scale: u8, rounding_mode: bool) -> Option { + let div = 10i128.checked_pow(scale as u32)?; + let mut val = self / div; + if rounding_mode && scale > 0 { + // Checking whether numbers need to be added or subtracted to calculate rounding + if let Some(r) = self.checked_rem(div) { + if let Some(m) = r.checked_div(i128::e(scale as u32 - 1)) { + if m >= 5i128 { + val = val.checked_add(1i128)?; + } else if m <= -5i128 { + val = val.checked_sub(1i128)?; + } + } + } + } + num_traits::cast(val) } -} - -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - Hash, - Serialize, - Deserialize, - BorshSerialize, - BorshDeserialize, - EnumAsInner, -)] -pub enum DecimalDataType { - Decimal128(DecimalSize), - Decimal256(DecimalSize), -} -#[derive( - Clone, - Copy, - PartialEq, - Eq, - EnumAsInner, - Serialize, - Deserialize, - BorshSerialize, - BorshDeserialize, -)] -pub enum DecimalScalar { - Decimal128(i128, DecimalSize), - Decimal256(i256, DecimalSize), -} + fn to_scalar(self, size: DecimalSize) -> DecimalScalar { + DecimalScalar::Decimal128(self, size) + } -impl DecimalScalar { - pub fn to_float64(&self) -> f64 { - match self { - DecimalScalar::Decimal128(v, size) => i128::to_float64(*v, size.scale), - DecimalScalar::Decimal256(v, size) => i256::to_float64(*v, size.scale), + fn try_downcast_column(column: &Column) -> Option<(Buffer, DecimalSize)> { + let column = column.as_decimal()?; + match column { + DecimalColumn::Decimal128(c, size) => Some((c.clone(), *size)), + _ => None, } } - pub fn is_positive(&self) -> bool { - match self { - DecimalScalar::Decimal128(v, _) => i128::is_positive(*v), - DecimalScalar::Decimal256(v, _) => i256::is_positive(*v), + + fn try_downcast_builder(builder: &mut ColumnBuilder) -> Option<&mut Vec> { + match builder { + ColumnBuilder::Decimal(DecimalColumnBuilder::Decimal128(s, _)) => Some(s), + _ => None, } } -} - -#[derive(Clone, PartialEq, EnumAsInner)] -pub enum DecimalColumn { - Decimal128(Buffer, DecimalSize), - Decimal256(Buffer, DecimalSize), -} - -#[derive(Clone, PartialEq, EnumAsInner, Debug)] -pub enum DecimalColumnVec { - Decimal128(Vec>, DecimalSize), - Decimal256(Vec>, DecimalSize), -} - -#[derive(Debug, Clone, PartialEq, Eq, EnumAsInner)] -pub enum DecimalColumnBuilder { - Decimal128(Vec, DecimalSize), - Decimal256(Vec, DecimalSize), -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, EnumAsInner)] -pub enum DecimalDomain { - Decimal128(SimpleDomain, DecimalSize), - Decimal256(SimpleDomain, DecimalSize), -} -impl DecimalDomain { - pub fn decimal_size(&self) -> DecimalSize { - match self { - DecimalDomain::Decimal128(_, size) => *size, - DecimalDomain::Decimal256(_, size) => *size, + fn try_downcast_owned_builder(builder: ColumnBuilder) -> Option> { + match builder { + ColumnBuilder::Decimal(DecimalColumnBuilder::Decimal128(s, _)) => Some(s), + _ => None, } } -} - -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - Hash, - Serialize, - Deserialize, - BorshSerialize, - BorshDeserialize, -)] -pub struct DecimalSize { - pub precision: u8, - pub scale: u8, -} - -pub trait Decimal: - Sized - + Default - + Debug - + std::fmt::Display - + Copy - + Clone - + PartialEq - + Eq - + std::ops::AddAssign - + PartialOrd - + Ord - + Sync - + Send - + 'static -{ - // the Layout align size of i128 and i256 have changed - // https://blog.rust-lang.org/2024/03/30/i128-layout-update.html - // Here we keep this struct in aggregate state which minimize the align of the struct - type U64Array: Send + Sync + Copy + Default + Debug; - fn zero() -> Self; - fn one() -> Self; - fn minus_one() -> Self; - - // 10**scale - fn e(n: u32) -> Self; - fn mem_size() -> usize; - - fn to_u64_array(self) -> Self::U64Array; - fn from_u64_array(v: Self::U64Array) -> Self; - - fn checked_add(self, rhs: Self) -> Option; - fn checked_sub(self, rhs: Self) -> Option; - fn checked_div(self, rhs: Self) -> Option; - fn checked_mul(self, rhs: Self) -> Option; - fn checked_rem(self, rhs: Self) -> Option; - - fn do_round_div(self, rhs: Self, mul_scale: u32) -> Option; - - // mul two decimals and return a decimal with rounding option - fn do_round_mul(self, rhs: Self, shift_scale: u32) -> Option; - - fn min_for_precision(precision: u8) -> Self; - fn max_for_precision(precision: u8) -> Self; - - fn default_decimal_size() -> DecimalSize; - - fn from_float(value: f64) -> Self; - fn from_i128>(value: U) -> Self; - fn from_i256(value: i256) -> Self; - fn from_bigint(value: BigInt) -> Option; - fn de_binary(bytes: &mut &[u8]) -> Self; - fn display(self, scale: u8) -> String; - - fn to_float32(self, scale: u8) -> f32; - fn to_float64(self, scale: u8) -> f64; - - fn to_int(self, scale: u8, rounding_mode: bool) -> Option; + fn try_downcast_scalar<'a>(scalar: &DecimalScalar) -> Option { + match scalar { + DecimalScalar::Decimal128(val, _) => Some(*val), + _ => None, + } + } - fn try_downcast_column(column: &Column) -> Option<(Buffer, DecimalSize)>; - fn try_downcast_builder(builder: &mut ColumnBuilder) -> Option<&mut Vec>; + fn try_downcast_domain(domain: &DecimalDomain) -> Option> { + match domain { + DecimalDomain::Decimal128(val, _) => Some(*val), + _ => None, + } + } - fn try_downcast_owned_builder(builder: ColumnBuilder) -> Option>; + // will mock DecimalSize need modify when use it + fn upcast_scalar(scalar: Self, size: DecimalSize) -> Scalar { + Scalar::Decimal(DecimalScalar::Decimal128(scalar, size)) + } - fn try_downcast_scalar(scalar: &DecimalScalar) -> Option; - fn try_downcast_domain(domain: &DecimalDomain) -> Option>; + fn upcast_column(col: Buffer, size: DecimalSize) -> Column { + Column::Decimal(DecimalColumn::Decimal128(col, size)) + } - fn upcast_scalar(scalar: Self, size: DecimalSize) -> Scalar; - fn upcast_column(col: Buffer, size: DecimalSize) -> Column; - fn upcast_domain(domain: SimpleDomain, size: DecimalSize) -> Domain; - fn upcast_builder(builder: Vec, size: DecimalSize) -> DecimalColumnBuilder; - fn data_type() -> DataType; - const MIN: Self; - const MAX: Self; + fn upcast_domain(domain: SimpleDomain, size: DecimalSize) -> Domain { + Domain::Decimal(DecimalDomain::Decimal128(domain, size)) + } - fn to_column_from_buffer(value: Buffer, size: DecimalSize) -> DecimalColumn; + fn upcast_builder(builder: Vec, size: DecimalSize) -> DecimalColumnBuilder { + DecimalColumnBuilder::Decimal128(builder, size) + } - fn to_column(value: Vec, size: DecimalSize) -> DecimalColumn { - Self::to_column_from_buffer(value.into(), size) + fn data_type() -> DataType { + DataType::Decimal(DecimalDataType::Decimal128(DecimalSize { + precision: MAX_DECIMAL128_PRECISION, + scale: 0, + })) } - fn to_scalar(self, size: DecimalSize) -> DecimalScalar; + const MIN: i128 = -99999999999999999999999999999999999999i128; - fn with_size(&self, size: DecimalSize) -> Option { - let multiplier = Self::e(size.scale as u32); - let min_for_precision = Self::min_for_precision(size.precision); - let max_for_precision = Self::max_for_precision(size.precision); - self.checked_mul(multiplier).and_then(|v| { - if v > max_for_precision || v < min_for_precision { - None - } else { - Some(v) - } - }) - } + const MAX: i128 = 99999999999999999999999999999999999999i128; } -impl Decimal for i128 { - type U64Array = [u64; 2]; +impl Decimal for i256 { + type U64Array = [u64; 4]; fn to_u64_array(self) -> Self::U64Array { unsafe { std::mem::transmute(self) } @@ -833,267 +714,19 @@ impl Decimal for i128 { } fn zero() -> Self { - 0_i128 + i256::ZERO } fn one() -> Self { - 1_i128 + i256::ONE } fn minus_one() -> Self { - -1_i128 + i256::MINUS_ONE } fn e(n: u32) -> Self { - 10_i128.pow(n) - } - - fn mem_size() -> usize { - 16 - } - - fn checked_add(self, rhs: Self) -> Option { - self.checked_add(rhs) - } - - fn checked_sub(self, rhs: Self) -> Option { - self.checked_sub(rhs) - } - - fn checked_div(self, rhs: Self) -> Option { - self.checked_div(rhs) - } - - fn checked_mul(self, rhs: Self) -> Option { - self.checked_mul(rhs) - } - - fn checked_rem(self, rhs: Self) -> Option { - self.checked_rem(rhs) - } - - fn do_round_mul(self, rhs: Self, shift_scale: u32) -> Option { - let div = i256::e(shift_scale); - let res = if self.is_negative() == rhs.is_negative() { - (i256::from(self) * i256::from(rhs) + div / i256::from(2)) / div - } else { - (i256::from(self) * i256::from(rhs) - div / i256::from(2)) / div - }; - - if !(i256::from(i128::MIN)..=i256::from(i128::MAX)).contains(&res) { - None - } else { - Some(res.as_i128()) - } - } - - fn do_round_div(self, rhs: Self, mul_scale: u32) -> Option { - let mul = i256::e(mul_scale); - if self.is_negative() == rhs.is_negative() { - let res = (i256::from(self) * mul + i256::from(rhs) / i256::from(2)) / i256::from(rhs); - Some(*res.low()) - } else { - let res = (i256::from(self) * mul - i256::from(rhs) / i256::from(2)) / i256::from(rhs); - Some(*res.low()) - } - } - - fn min_for_precision(to_precision: u8) -> Self { - MIN_DECIMAL_FOR_EACH_PRECISION[to_precision as usize - 1] - } - - fn max_for_precision(to_precision: u8) -> Self { - MAX_DECIMAL_FOR_EACH_PRECISION[to_precision as usize - 1] - } - - fn default_decimal_size() -> DecimalSize { - DecimalSize { - precision: MAX_DECIMAL128_PRECISION, - scale: 0, - } - } - - fn to_column_from_buffer(value: Buffer, size: DecimalSize) -> DecimalColumn { - DecimalColumn::Decimal128(value, size) - } - - fn from_float(value: f64) -> Self { - // still needs to be optimized. - // An implementation similar to float64_as_i256 obtained from the ethnum library - const M: u64 = (f64::MANTISSA_DIGITS - 1) as u64; - const MAN_MASK: u64 = !(!0 << M); - const MAN_ONE: u64 = 1 << M; - const EXP_MASK: u64 = !0 >> f64::MANTISSA_DIGITS; - const EXP_OFFSET: u64 = EXP_MASK / 2; - const ABS_MASK: u64 = !0 >> 1; - const SIG_MASK: u64 = !ABS_MASK; - - let abs = f64::from_bits(value.to_bits() & ABS_MASK); - let sign = -(((value.to_bits() & SIG_MASK) >> (u64::BITS - 2)) as i128).wrapping_sub(1); // if self >= 0. { 1 } else { -1 } - if abs >= 1.0 { - let bits = abs.to_bits(); - let exponent = ((bits >> M) & EXP_MASK) - EXP_OFFSET; - let mantissa = (bits & MAN_MASK) | MAN_ONE; - if exponent <= M { - (>::from(mantissa >> (M - exponent))) * sign - } else if exponent < 127 { - (>::from(mantissa) << (exponent - M)) * sign - } else if sign > 0 { - i128::MAX - } else { - i128::MIN - } - } else { - Self::zero() - } - } - - fn from_i128>(value: U) -> Self { - value.into() - } - - fn from_i256(value: i256) -> Self { - value.as_i128() - } - - fn from_bigint(value: BigInt) -> Option { - value.to_i128() - } - - fn de_binary(bytes: &mut &[u8]) -> Self { - let bs: [u8; std::mem::size_of::()] = - bytes[0..std::mem::size_of::()].try_into().unwrap(); - *bytes = &bytes[std::mem::size_of::()..]; - - i128::from_le_bytes(bs) - } - - fn display(self, scale: u8) -> String { - display_decimal_128(self, scale) - } - - fn to_float32(self, scale: u8) -> f32 { - let div = 10_f32.powi(scale as i32); - self as f32 / div - } - - fn to_float64(self, scale: u8) -> f64 { - let div = 10_f64.powi(scale as i32); - self as f64 / div - } - - fn to_int(self, scale: u8, rounding_mode: bool) -> Option { - let div = 10i128.checked_pow(scale as u32)?; - let mut val = self / div; - if rounding_mode && scale > 0 { - // Checking whether numbers need to be added or subtracted to calculate rounding - if let Some(r) = self.checked_rem(div) { - if let Some(m) = r.checked_div(i128::e(scale as u32 - 1)) { - if m >= 5i128 { - val = val.checked_add(1i128)?; - } else if m <= -5i128 { - val = val.checked_sub(1i128)?; - } - } - } - } - num_traits::cast(val) - } - - fn to_scalar(self, size: DecimalSize) -> DecimalScalar { - DecimalScalar::Decimal128(self, size) - } - - fn try_downcast_column(column: &Column) -> Option<(Buffer, DecimalSize)> { - let column = column.as_decimal()?; - match column { - DecimalColumn::Decimal128(c, size) => Some((c.clone(), *size)), - _ => None, - } - } - - fn try_downcast_builder(builder: &mut ColumnBuilder) -> Option<&mut Vec> { - match builder { - ColumnBuilder::Decimal(DecimalColumnBuilder::Decimal128(s, _)) => Some(s), - _ => None, - } - } - - fn try_downcast_owned_builder(builder: ColumnBuilder) -> Option> { - match builder { - ColumnBuilder::Decimal(DecimalColumnBuilder::Decimal128(s, _)) => Some(s), - _ => None, - } - } - - fn try_downcast_scalar<'a>(scalar: &DecimalScalar) -> Option { - match scalar { - DecimalScalar::Decimal128(val, _) => Some(*val), - _ => None, - } - } - - fn try_downcast_domain(domain: &DecimalDomain) -> Option> { - match domain { - DecimalDomain::Decimal128(val, _) => Some(*val), - _ => None, - } - } - - // will mock DecimalSize need modify when use it - fn upcast_scalar(scalar: Self, size: DecimalSize) -> Scalar { - Scalar::Decimal(DecimalScalar::Decimal128(scalar, size)) - } - - fn upcast_column(col: Buffer, size: DecimalSize) -> Column { - Column::Decimal(DecimalColumn::Decimal128(col, size)) - } - - fn upcast_domain(domain: SimpleDomain, size: DecimalSize) -> Domain { - Domain::Decimal(DecimalDomain::Decimal128(domain, size)) - } - - fn upcast_builder(builder: Vec, size: DecimalSize) -> DecimalColumnBuilder { - DecimalColumnBuilder::Decimal128(builder, size) - } - - fn data_type() -> DataType { - DataType::Decimal(DecimalDataType::Decimal128(DecimalSize { - precision: MAX_DECIMAL128_PRECISION, - scale: 0, - })) - } - - const MIN: i128 = -99999999999999999999999999999999999999i128; - - const MAX: i128 = 99999999999999999999999999999999999999i128; -} - -impl Decimal for i256 { - type U64Array = [u64; 4]; - - fn to_u64_array(self) -> Self::U64Array { - unsafe { std::mem::transmute(self) } - } - - fn from_u64_array(v: Self::U64Array) -> Self { - unsafe { std::mem::transmute(v) } - } - - fn zero() -> Self { - i256::ZERO - } - - fn one() -> Self { - i256::ONE - } - - fn minus_one() -> Self { - i256::MINUS_ONE - } - - fn e(n: u32) -> Self { - (i256::ONE * i256::from(10)).pow(n) + (i256::ONE * i256::from(10)).pow(n) } fn mem_size() -> usize { @@ -1101,23 +734,23 @@ impl Decimal for i256 { } fn checked_add(self, rhs: Self) -> Option { - self.checked_add(rhs) + self.0.checked_add(rhs.0).map(Self) } fn checked_sub(self, rhs: Self) -> Option { - self.checked_sub(rhs) + self.0.checked_sub(rhs.0).map(Self) } fn checked_div(self, rhs: Self) -> Option { - self.checked_div(rhs) + self.0.checked_div(rhs.0).map(Self) } fn checked_mul(self, rhs: Self) -> Option { - self.checked_mul(rhs) + self.0.checked_mul(rhs.0).map(Self) } fn checked_rem(self, rhs: Self) -> Option { - self.checked_rem(rhs) + self.0.checked_rem(rhs.0).map(Self) } fn do_round_mul(self, rhs: Self, shift_scale: u32) -> Option { @@ -1237,12 +870,12 @@ impl Decimal for i256 { fn to_float32(self, scale: u8) -> f32 { let div = 10_f32.powi(scale as i32); - self.0.as_f32() / div + self.as_f32() / div } fn to_float64(self, scale: u8) -> f64 { let div = 10_f64.powi(scale as i32); - self.0.as_f64() / div + self.as_f64() / div } fn to_int(self, scale: u8, rounding_mode: bool) -> Option { @@ -2501,3 +2134,374 @@ pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ -9999999999999999999999999999999999999, -99999999999999999999999999999999999999, ]; + +/// Physical representation of a decimal +#[derive(Clone, Copy, Default, Eq, Serialize, Deserialize)] +#[allow(non_camel_case_types)] +#[repr(C)] +pub struct i256(pub ethnum::I256); + +impl i256 { + /// The additive identity for this integer type, i.e. `0`. + pub const ZERO: Self = Self(ethnum::I256([0; 2])); + + /// The multiplicative identity for this integer type, i.e. `1`. + pub const ONE: Self = Self(ethnum::I256::new(1)); + + /// The multiplicative inverse for this integer type, i.e. `-1`. + pub const MINUS_ONE: Self = Self(ethnum::I256::new(-1)); + + /// Creates a new 256-bit integer value from a primitive `i128` integer. + #[inline] + pub const fn new(value: i128) -> Self { + Self(ethnum::I256::new(value)) + } + + /// Returns a new [`i256`] from two `i128`. + pub fn from_words(hi: i128, lo: i128) -> Self { + Self(ethnum::I256::from_words(hi, lo)) + } + + pub fn from_str_radix( + src: &str, + radix: u32, + ) -> std::result::Result { + ethnum::I256::from_str_radix(src, radix).map(Self) + } + + #[inline] + pub const fn to_le_bytes(&self) -> [u8; 32] { + let (high, low) = self.0.into_words(); + let low = low.to_le_bytes(); + let high = high.to_le_bytes(); + let mut i = 0; + let mut bytes = [0u8; 32]; + while i != 16 { + bytes[i] = low[i]; + bytes[i + 16] = high[i]; + i += 1; + } + bytes + } + + #[inline] + pub const fn to_be_bytes(&self) -> [u8; 32] { + let (high, low) = self.0.into_words(); + let low = low.to_be_bytes(); + let high = high.to_be_bytes(); + let mut bytes = [0; 32]; + let mut i = 0; + while i != 16 { + bytes[i] = high[i]; + bytes[i + 16] = low[i]; + i += 1; + } + bytes + } + + #[inline] + pub const fn from_be_bytes(bytes: [u8; 32]) -> Self { + let mut low = [0; 16]; + let mut high = [0; 16]; + let mut i = 0; + while i != 16 { + high[i] = bytes[i]; + low[i] = bytes[i + 16]; + i += 1; + } + let high = i128::from_be_bytes(high); + let low = i128::from_be_bytes(low); + Self(ethnum::I256::from_words(high, low)) + } + + #[inline] + pub const fn from_le_bytes(bytes: [u8; 32]) -> Self { + let mut low = [0; 16]; + let mut high = [0; 16]; + let mut i = 0; + while i != 16 { + low[i] = bytes[i]; + high[i] = bytes[i + 16]; + i += 1; + } + let high = i128::from_be_bytes(high); + let low = i128::from_be_bytes(low); + Self(ethnum::I256::from_words(high, low)) + } + + #[inline] + pub const fn is_positive(self) -> bool { + self.0.is_positive() + } + + #[inline] + pub const fn is_negative(self) -> bool { + self.0.is_negative() + } + + #[inline] + pub fn saturating_abs(self) -> Self { + Self(self.0.saturating_abs()) + } + + #[inline(always)] + pub fn leading_zeros(self) -> u32 { + self.0.leading_zeros() + } + + /// Cast to a primitive `f32`. + #[inline] + pub fn as_f32(self) -> f32 { + self.0.as_f32() + } + + /// Cast to a primitive `f64`. + #[inline] + pub fn as_f64(self) -> f64 { + self.0.as_f64() + } + + /// Cast to a primitive `u64`. + #[inline] + pub const fn as_u64(self) -> u64 { + self.0.as_u64() + } + + /// Cast to a primitive `i64`. + #[inline] + pub const fn as_i64(self) -> i64 { + self.0.as_i64() + } + + /// Cast to a primitive `i128`. + #[inline] + pub const fn as_i128(self) -> i128 { + self.0.as_i128() + } + + /// Get the low 128-bit word for this signed integer. + #[inline] + pub fn low(&self) -> &i128 { + self.0.low() + } + + /// Get the high 128-bit word for this signed integer. + #[inline] + pub fn high(&self) -> &i128 { + self.0.high() + } + + #[allow(unused_attributes)] + #[inline] + pub fn abs(self) -> Self { + Self(self.0.abs()) + } + + #[inline] + pub fn checked_neg(self) -> Option { + self.0.checked_neg().map(Self) + } + + #[inline] + pub fn pow(self, exp: u32) -> Self { + Self(self.0.pow(exp)) + } +} + +impl Neg for i256 { + type Output = Self; + + #[inline] + fn neg(self) -> Self::Output { + Self(self.0.checked_neg().expect("i256 overflow")) + } +} + +impl std::fmt::Debug for i256 { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{:?}", self.0) + } +} + +impl std::fmt::Display for i256 { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl AddAssign for i256 { + fn add_assign(&mut self, rhs: Self) { + self.0 += rhs.0; + } +} + +impl SubAssign for i256 { + fn sub_assign(&mut self, rhs: Self) { + self.0 -= rhs.0; + } +} + +impl MulAssign for i256 { + fn mul_assign(&mut self, rhs: Self) { + self.0 *= rhs.0; + } +} + +impl DivAssign for i256 { + fn div_assign(&mut self, rhs: Self) { + self.0 /= rhs.0; + } +} + +impl Add for i256 { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self(self.0 + rhs.0) + } +} + +impl Sub for i256 { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Self(self.0 - rhs.0) + } +} + +impl Mul for i256 { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + Self(self.0 * rhs.0) + } +} + +impl Div for i256 { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + Self(self.0 / rhs.0) + } +} + +macro_rules! impl_from { + ($($t:ty),* $(,)?) => {$( + impl From<$t> for i256 { + #[inline] + fn from(value: $t) -> Self { + i256(value.as_i256()) + } + } + )*}; +} + +impl_from! { + bool, + i8, i16, i32, i64, i128, + u8, u16, u32, u64, u128, +} + +impl TryFrom for i256 { + type Error = TryFromIntError; + + fn try_from(value: u256) -> std::result::Result { + let i256_value = ethnum::i256::try_from(value)?; + Ok(i256(i256_value)) + } +} + +impl BorshSerialize for i256 { + fn serialize(&self, writer: &mut W) -> borsh::io::Result<()> { + BorshSerialize::serialize(&self.0 .0, writer) + } +} + +impl BorshDeserialize for i256 { + fn deserialize_reader(reader: &mut R) -> borsh::io::Result { + let value: [i128; 2] = BorshDeserialize::deserialize_reader(reader)?; + Ok(Self(ethnum::I256(value))) + } +} + +impl Marshal for i256 { + fn marshal(&self, scratch: &mut [u8]) { + self.0.marshal(scratch); + } +} + +macro_rules! impl_into_float { + ($($t:ty => $f:ident),* $(,)?) => {$( + impl From for $t { + #[inline] + fn from(x: i256) -> $t { + x.0.$f() + } + } + )*}; +} + +impl_into_float! { + f32 => as_f32, f64 => as_f64, +} + +impl core::hash::Hash for i256 { + #[inline] + fn hash(&self, hasher: &mut H) + where H: core::hash::Hasher { + core::hash::Hash::hash(&self.0, hasher); + } +} + +impl PartialEq for i256 { + #[inline] + fn eq(&self, other: &Self) -> bool { + let (ahi, alo) = self.0.into_words(); + let (bhi, blo) = other.0.into_words(); + (ahi == bhi) & (alo == blo) + // bitwise and rather than logical and + // to make O0 code more effecient. + } +} + +impl PartialEq for i256 { + #[inline] + fn eq(&self, other: &i128) -> bool { + *self == i256::new(*other) + } +} + +impl PartialEq for i128 { + #[inline] + fn eq(&self, other: &i256) -> bool { + i256::new(*self) == *other + } +} + +impl PartialOrd for i256 { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialOrd for i256 { + #[inline] + fn partial_cmp(&self, rhs: &i128) -> Option { + Some(self.cmp(&i256::new(*rhs))) + } +} + +impl PartialOrd for i128 { + #[inline] + fn partial_cmp(&self, rhs: &i256) -> Option { + Some(i256::new(*self).cmp(rhs)) + } +} + +impl Ord for i256 { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } +} diff --git a/src/query/expression/tests/it/decimal.rs b/src/query/expression/tests/it/decimal.rs index bd8a87701a0e..9c84eed27c3c 100644 --- a/src/query/expression/tests/it/decimal.rs +++ b/src/query/expression/tests/it/decimal.rs @@ -18,10 +18,10 @@ use databend_common_expression::serialize::read_decimal_with_size; use databend_common_expression::type_check::common_super_type; use databend_common_expression::types::decimal::Decimal; use databend_common_expression::types::decimal::DecimalSize; +use databend_common_expression::types::i256; use databend_common_expression::types::DataType; use databend_common_expression::types::DecimalDataType; use databend_common_expression::types::NumberDataType; -use ethnum::i256; use num_bigint::BigInt; use pretty_assertions::assert_eq; diff --git a/src/query/expression/tests/it/row.rs b/src/query/expression/tests/it/row.rs index 4c42d0fc630f..85c444a95534 100644 --- a/src/query/expression/tests/it/row.rs +++ b/src/query/expression/tests/it/row.rs @@ -20,13 +20,13 @@ use databend_common_base::base::OrderedFloat; use databend_common_column::bitmap::MutableBitmap; use databend_common_expression::types::binary::BinaryColumnBuilder; use databend_common_expression::types::decimal::*; +use databend_common_expression::types::i256; use databend_common_expression::types::nullable::NullableColumn; use databend_common_expression::types::*; use databend_common_expression::Column; use databend_common_expression::FromData; use databend_common_expression::RowConverter; use databend_common_expression::SortField; -use ethnum::i256; use itertools::Itertools; use jsonb::parse_value; use jsonb::RawJsonb; diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index a19fecf0cbdc..bd7fe37c7270 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -22,7 +22,6 @@ use bstr::ByteSlice; use databend_common_column::types::months_days_micros; use databend_common_expression::types::binary::BinaryColumnBuilder; use databend_common_expression::types::date::string_to_date; -use databend_common_expression::types::i256; use databend_common_expression::types::nullable::NullableColumn; use databend_common_expression::types::nullable::NullableColumnBuilder; use databend_common_expression::types::nullable::NullableDomain; @@ -38,10 +37,6 @@ use databend_common_expression::types::Bitmap; use databend_common_expression::types::BooleanType; use databend_common_expression::types::DataType; use databend_common_expression::types::DateType; -use databend_common_expression::types::Decimal128Type; -use databend_common_expression::types::Decimal256Type; -use databend_common_expression::types::DecimalScalar; -use databend_common_expression::types::DecimalSize; use databend_common_expression::types::GenericType; use databend_common_expression::types::IntervalType; use databend_common_expression::types::MutableBitmap; @@ -737,58 +732,6 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); - registry.register_combine_nullable_1_arg::( - "as_decimal128", - |_, _| FunctionDomain::Full, - vectorize_with_builder_1_arg::>( - |v, output, ctx| { - if let Some(validity) = &ctx.validity { - if !validity.get_bit(output.len()) { - output.push_null(); - return; - } - } - match RawJsonb::new(v).as_decimal128() { - Ok(Some(res)) => { - let size = DecimalSize { - precision: res.precision, - scale: res.scale, - }; - let val = DecimalScalar::Decimal128(res.value, size); - output.push(res.value); - } - _ => output.push_null(), - } - }, - ), - ); - - registry.register_combine_nullable_1_arg::( - "as_decimal256", - |_, _| FunctionDomain::Full, - vectorize_with_builder_1_arg::>( - |v, output, ctx| { - if let Some(validity) = &ctx.validity { - if !validity.get_bit(output.len()) { - output.push_null(); - return; - } - } - match RawJsonb::new(v).as_decimal256() { - Ok(Some(res)) => { - let size = DecimalSize { - precision: res.precision, - scale: res.scale, - }; - let val = DecimalScalar::Decimal256(i256(res.value), size); - output.push(i256(res.value)); - } - _ => output.push_null(), - } - }, - ), - ); - registry.register_combine_nullable_1_arg::( "as_binary", |_, _| FunctionDomain::Full, diff --git a/src/query/functions/tests/it/aggregates/agg_hashtable.rs b/src/query/functions/tests/it/aggregates/agg_hashtable.rs index ec309c306f5c..368bd158237c 100644 --- a/src/query/functions/tests/it/aggregates/agg_hashtable.rs +++ b/src/query/functions/tests/it/aggregates/agg_hashtable.rs @@ -32,6 +32,7 @@ use std::sync::Arc; use bumpalo::Bump; use databend_common_expression::block_debug::assert_block_value_sort_eq; use databend_common_expression::get_states_layout; +use databend_common_expression::types::i256; use databend_common_expression::types::ArgType; use databend_common_expression::types::BooleanType; use databend_common_expression::types::DataType; @@ -57,7 +58,6 @@ use databend_common_expression::PayloadFlushState; use databend_common_expression::ProbeState; use databend_common_functions::aggregates::AggregateFunctionFactory; use databend_common_functions::aggregates::DecimalSumState; -use ethnum::I256; use itertools::Itertools; // cargo test --package databend-common-functions --test it -- aggregates::agg_hashtable::test_agg_hashtable --exact --nocapture @@ -194,7 +194,7 @@ fn test_layout() { .get("sum", vec![], vec![decimal_type], vec![]) .unwrap(); type S = DecimalSumState>; - type M = DecimalSumState>; + type M = DecimalSumState>; let states_layout = get_states_layout(&[aggrs.clone()]).unwrap(); diff --git a/src/query/functions/tests/it/scalars/arithmetic.rs b/src/query/functions/tests/it/scalars/arithmetic.rs index 4c36f206a160..0d31221e451f 100644 --- a/src/query/functions/tests/it/scalars/arithmetic.rs +++ b/src/query/functions/tests/it/scalars/arithmetic.rs @@ -16,10 +16,10 @@ use std::io::Write; use databend_common_expression::types::decimal::DecimalColumn; use databend_common_expression::types::decimal::DecimalSize; +use databend_common_expression::types::i256; use databend_common_expression::types::number::*; use databend_common_expression::Column; use databend_common_expression::FromData; -use ethnum::i256; use goldenfile::Mint; use super::run_ast; diff --git a/src/query/functions/tests/it/scalars/geo.rs b/src/query/functions/tests/it/scalars/geo.rs index 4557602634b8..f45b7742e0fa 100644 --- a/src/query/functions/tests/it/scalars/geo.rs +++ b/src/query/functions/tests/it/scalars/geo.rs @@ -25,14 +25,23 @@ fn test_geo() { let mut mint = Mint::new("tests/it/scalars/testdata"); let file = &mut mint.new_goldenfile("geo.txt").unwrap(); + println!("\n-----1"); test_geo_to_h3(file); + println!("\n-----2"); test_great_circle_distance(file); + println!("\n-----3"); test_geo_distance(file); + println!("\n-----4"); test_great_circle_angle(file); + println!("\n-----5"); test_point_in_ellipses(file); + println!("\n-----6"); test_point_in_polygon(file); + println!("\n-----7"); test_geohash_encode(file); + println!("\n-----8"); test_geohash_decode(file); + println!("\n-----9"); } fn test_geo_to_h3(file: &mut impl Write) { diff --git a/src/query/functions/tests/it/type_check.rs b/src/query/functions/tests/it/type_check.rs index fb6f7caf5114..6ff001278411 100644 --- a/src/query/functions/tests/it/type_check.rs +++ b/src/query/functions/tests/it/type_check.rs @@ -75,10 +75,10 @@ fn test_type_check() { "n_d128", Decimal128Type::from_data(vec![0_i128]).wrap_nullable(None), ), - ("d256", Decimal256Type::from_data(vec![ethnum::I256::ZERO])), + ("d256", Decimal256Type::from_data(vec![i256::ZERO])), ( "n_d256", - Decimal256Type::from_data(vec![ethnum::I256::ZERO]).wrap_nullable(None), + Decimal256Type::from_data(vec![i256::ZERO]).wrap_nullable(None), ), ]; diff --git a/src/query/storages/stage/src/read/avro/decoder.rs b/src/query/storages/stage/src/read/avro/decoder.rs index ba2952cb8ac0..4214ef693d90 100644 --- a/src/query/storages/stage/src/read/avro/decoder.rs +++ b/src/query/storages/stage/src/read/avro/decoder.rs @@ -530,6 +530,7 @@ mod test { use apache_avro::Decimal; use apache_avro::Schema; use apache_avro::Writer; + use databend_common_expression::types::i256; use databend_common_expression::types::DecimalDataType; use databend_common_expression::types::DecimalScalar; use databend_common_expression::types::DecimalSize; @@ -541,7 +542,6 @@ mod test { use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; - use ethnum::i256; use num_bigint::BigInt; use serde_json::json; From ea52474f66c071abc7189806a179222f47e6c43b Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 24 Apr 2025 17:23:56 +0800 Subject: [PATCH 07/13] fix --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8620c00ae56f..7a0a7670ede6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9190,7 +9190,7 @@ dependencies = [ [[package]] name = "jsonb" version = "0.5.1" -source = "git+https://github.com/b41sh/jsonb?rev=df93358108160ff772b60f2337957b0886b5639a#df93358108160ff772b60f2337957b0886b5639a" +source = "git+https://github.com/b41sh/jsonb?rev=90b9b0155daf395f22ec47c0cacfb70ba8e96985#90b9b0155daf395f22ec47c0cacfb70ba8e96985" dependencies = [ "byteorder", "ethnum", diff --git a/Cargo.toml b/Cargo.toml index 2ad8162a41bb..20cd576fce8d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -637,7 +637,7 @@ backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "7226 color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "c149502" } display-more = { git = "https://github.com/databendlabs/display-more", tag = "v0.1.2" } -jsonb = { git = "https://github.com/b41sh/jsonb", rev = "df93358108160ff772b60f2337957b0886b5639a" } +jsonb = { git = "https://github.com/b41sh/jsonb", rev = "90b9b0155daf395f22ec47c0cacfb70ba8e96985" } map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.2.3" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.9" } From 7fca5f5fbba34e74a3c99e6d0fc8af558076554f Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 24 Apr 2025 18:04:16 +0800 Subject: [PATCH 08/13] fix --- src/query/expression/src/types/decimal.rs | 2 +- .../tests/it/aggregates/testdata/agg.txt | 38 +++++++++---------- .../it/aggregates/testdata/agg_group_by.txt | 38 +++++++++---------- src/query/functions/tests/it/scalars/geo.rs | 9 ----- 4 files changed, 39 insertions(+), 48 deletions(-) diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index 0e30ac6d05c4..5fc875c3a0c7 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -2502,6 +2502,6 @@ impl PartialOrd for i128 { impl Ord for i256 { #[inline] fn cmp(&self, other: &Self) -> Ordering { - self.partial_cmp(other).unwrap_or(Ordering::Equal) + self.0.cmp(&other.0) } } diff --git a/src/query/functions/tests/it/aggregates/testdata/agg.txt b/src/query/functions/tests/it/aggregates/testdata/agg.txt index d206e21bb626..9d2c83c31c43 100644 --- a/src/query/functions/tests/it/aggregates/testdata/agg.txt +++ b/src/query/functions/tests/it/aggregates/testdata/agg.txt @@ -1467,12 +1467,12 @@ evaluation (internal): ast: json_array_agg(dt) evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| dt | Timestamp([1, 0, 2, 3]) | -| Output | Variant([0x800000041000001a1000001a1000001a1000001a313937302d30312d30312030303a30303a30302e303030303031313937302d30312d30312030303a30303a30302e303030303030313937302d30312d30312030303a30303a30302e303030303032313937302d30312d30312030303a30303a30302e303030303033]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------+ +| dt | Timestamp([1, 0, 2, 3]) | +| Output | Variant([0x8000000460000009600000096000000960000009200000000000000001200000000000000000200000000000000002200000000000000003]) | ++--------+-------------------------------------------------------------------------------------------------------------------------------+ ast: json_array_agg(event1) @@ -1487,12 +1487,12 @@ evaluation (internal): ast: json_array_agg(dec) evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------+ -| dec | NullableColumn { column: Decimal128([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } | -| Output | Variant([0x80000003200000092000000920000009603ff199999999999a60400199999999999a60400a666666666666]) | -+--------+-----------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| dec | NullableColumn { column: Decimal128([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } | +| Output | Variant([0x80000003200000132000001320000013700000000000000000000000000000006e0f0270000000000000000000000000000000dc0f02700000000000000000000000000000014a0f02]) | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ error: Json object have duplicate key 'k' @@ -1523,13 +1523,13 @@ error: json_object_agg does not support key type 'Number(Int64)' ast: json_object_agg(s, dec) evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn[abc, def, opq, xyz] | -| dec | NullableColumn { column: Decimal128([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } | -| Output | Variant([0x4000000310000003100000031000000320000009200000092000000961626364656678797a603ff199999999999a60400199999999999a60400a666666666666]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | StringColumn[abc, def, opq, xyz] | +| dec | NullableColumn { column: Decimal128([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } | +| Output | Variant([0x4000000310000003100000031000000320000013200000132000001361626364656678797a700000000000000000000000000000006e0f0270000000000000000000000000000000dc0f02700000000000000000000000000000014a0f02]) | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast: mode(1) diff --git a/src/query/functions/tests/it/aggregates/testdata/agg_group_by.txt b/src/query/functions/tests/it/aggregates/testdata/agg_group_by.txt index 74d23b4e9a61..3ecbbdc08bb4 100644 --- a/src/query/functions/tests/it/aggregates/testdata/agg_group_by.txt +++ b/src/query/functions/tests/it/aggregates/testdata/agg_group_by.txt @@ -1405,12 +1405,12 @@ evaluation (internal): ast: json_array_agg(dt) evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| dt | Timestamp([1, 0, 2, 3]) | -| Output | Variant([0x800000041000001a1000001a1000001a1000001a313937302d30312d30312030303a30303a30302e303030303031313937302d30312d30312030303a30303a30302e303030303030313937302d30312d30312030303a30303a30302e303030303032313937302d30312d30312030303a30303a30302e303030303033]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------+ +| dt | Timestamp([1, 0, 2, 3]) | +| Output | Variant([0x8000000460000009600000096000000960000009200000000000000001200000000000000000200000000000000002200000000000000003]) | ++--------+-------------------------------------------------------------------------------------------------------------------------------+ ast: json_array_agg(event1) @@ -1425,12 +1425,12 @@ evaluation (internal): ast: json_array_agg(dec) evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------+ -| dec | NullableColumn { column: Decimal128([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } | -| Output | Variant([0x80000003200000092000000920000009603ff199999999999a60400199999999999a60400a666666666666]) | -+--------+-----------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| dec | NullableColumn { column: Decimal128([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } | +| Output | Variant([0x80000003200000132000001320000013700000000000000000000000000000006e0f0270000000000000000000000000000000dc0f02700000000000000000000000000000014a0f02]) | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ error: Json object have duplicate key 'k' @@ -1461,13 +1461,13 @@ error: json_object_agg does not support key type 'Number(Int64)' ast: json_object_agg(s, dec) evaluation (internal): -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn[abc, def, opq, xyz] | -| dec | NullableColumn { column: Decimal128([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } | -| Output | Variant([0x4000000310000003100000031000000320000009200000092000000961626364656678797a603ff199999999999a60400199999999999a60400a666666666666]) | -+--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | StringColumn[abc, def, opq, xyz] | +| dec | NullableColumn { column: Decimal128([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } | +| Output | Variant([0x4000000310000003100000031000000320000013200000132000001361626364656678797a700000000000000000000000000000006e0f0270000000000000000000000000000000dc0f02700000000000000000000000000000014a0f02]) | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast: mode(1) diff --git a/src/query/functions/tests/it/scalars/geo.rs b/src/query/functions/tests/it/scalars/geo.rs index f45b7742e0fa..4557602634b8 100644 --- a/src/query/functions/tests/it/scalars/geo.rs +++ b/src/query/functions/tests/it/scalars/geo.rs @@ -25,23 +25,14 @@ fn test_geo() { let mut mint = Mint::new("tests/it/scalars/testdata"); let file = &mut mint.new_goldenfile("geo.txt").unwrap(); - println!("\n-----1"); test_geo_to_h3(file); - println!("\n-----2"); test_great_circle_distance(file); - println!("\n-----3"); test_geo_distance(file); - println!("\n-----4"); test_great_circle_angle(file); - println!("\n-----5"); test_point_in_ellipses(file); - println!("\n-----6"); test_point_in_polygon(file); - println!("\n-----7"); test_geohash_encode(file); - println!("\n-----8"); test_geohash_decode(file); - println!("\n-----9"); } fn test_geo_to_h3(file: &mut impl Write) { From 8ec20fcce5d6e4e06c40b3e4a1d61236dfff7cc4 Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 25 Apr 2025 16:01:58 +0800 Subject: [PATCH 09/13] fix --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/query/functions/src/scalars/variant.rs | 84 +++++++++++++++++++ .../tests/it/scalars/testdata/cast.txt | 8 +- .../it/scalars/testdata/function_list.txt | 8 ++ .../tests/it/scalars/testdata/variant.txt | 72 ++++++++++++++++ .../functions/tests/it/scalars/variant.rs | 24 ++++++ .../mysql/writers/query_result_writer.rs | 1 + .../02_0056_function_semi_structureds_as.test | 41 +++++++++ 9 files changed, 236 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a0a7670ede6..7bc4d7b85d51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9190,7 +9190,7 @@ dependencies = [ [[package]] name = "jsonb" version = "0.5.1" -source = "git+https://github.com/b41sh/jsonb?rev=90b9b0155daf395f22ec47c0cacfb70ba8e96985#90b9b0155daf395f22ec47c0cacfb70ba8e96985" +source = "git+https://github.com/b41sh/jsonb?rev=73871c14bc11b65a1cba90bdb4ccc85f06c356e2#73871c14bc11b65a1cba90bdb4ccc85f06c356e2" dependencies = [ "byteorder", "ethnum", diff --git a/Cargo.toml b/Cargo.toml index 20cd576fce8d..dddc18f255b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -637,7 +637,7 @@ backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "7226 color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "c149502" } display-more = { git = "https://github.com/databendlabs/display-more", tag = "v0.1.2" } -jsonb = { git = "https://github.com/b41sh/jsonb", rev = "90b9b0155daf395f22ec47c0cacfb70ba8e96985" } +jsonb = { git = "https://github.com/b41sh/jsonb", rev = "73871c14bc11b65a1cba90bdb4ccc85f06c356e2" } map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.2.3" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.9" } diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index bd7fe37c7270..26171913472b 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -732,6 +732,23 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); + registry.register_passthrough_nullable_1_arg::( + "is_binary", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push(false); + return; + } + } + match RawJsonb::new(v).is_binary() { + Ok(res) => output.push(res), + Err(_) => output.push(false), + } + }), + ); + registry.register_combine_nullable_1_arg::( "as_binary", |_, _| FunctionDomain::Full, @@ -749,6 +766,23 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); + registry.register_passthrough_nullable_1_arg::( + "is_date", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push(false); + return; + } + } + match RawJsonb::new(v).is_date() { + Ok(res) => output.push(res), + Err(_) => output.push(false), + } + }), + ); + registry.register_combine_nullable_1_arg::( "as_date", |_, _| FunctionDomain::Full, @@ -766,6 +800,23 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); + registry.register_passthrough_nullable_1_arg::( + "is_timestamp", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push(false); + return; + } + } + match RawJsonb::new(v).is_timestamp() { + Ok(res) => output.push(res), + Err(_) => output.push(false), + } + }), + ); + registry.register_combine_nullable_1_arg::( "as_timestamp", |_, _| FunctionDomain::Full, @@ -785,6 +836,23 @@ pub fn register(registry: &mut FunctionRegistry) { ), ); + registry.register_passthrough_nullable_1_arg::( + "is_interval", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push(false); + return; + } + } + match RawJsonb::new(v).is_interval() { + Ok(res) => output.push(res), + Err(_) => output.push(false), + } + }), + ); + registry.register_combine_nullable_1_arg::( "as_interval", |_, _| FunctionDomain::Full, @@ -1199,6 +1267,10 @@ pub fn register(registry: &mut FunctionRegistry) { output.push_null(); return; } + if let Ok(Some(date)) = raw_jsonb.as_date() { + output.push(date.value); + return; + } match raw_jsonb .as_str() .map_err(|e| format!("{e}")) @@ -1233,6 +1305,10 @@ pub fn register(registry: &mut FunctionRegistry) { } } let raw_jsonb = RawJsonb::new(val); + if let Ok(Some(date)) = raw_jsonb.as_date() { + output.push(date.value); + return; + } match raw_jsonb .as_str() .map_err(|e| format!("{e}")) @@ -1268,6 +1344,10 @@ pub fn register(registry: &mut FunctionRegistry) { output.push_null(); return; } + if let Ok(Some(ts)) = raw_jsonb.as_timestamp() { + output.push(ts.value); + return; + } match raw_jsonb .as_str() .map_err(|e| format!("{e}")) @@ -1301,6 +1381,10 @@ pub fn register(registry: &mut FunctionRegistry) { } let raw_jsonb = RawJsonb::new(val); + if let Ok(Some(ts)) = raw_jsonb.as_timestamp() { + output.push(ts.value); + return; + } match raw_jsonb .as_str() .map_err(|e| format!("{e}")) diff --git a/src/query/functions/tests/it/scalars/testdata/cast.txt b/src/query/functions/tests/it/scalars/testdata/cast.txt index 6f7493425858..479d777f5d1f 100644 --- a/src/query/functions/tests/it/scalars/testdata/cast.txt +++ b/src/query/functions/tests/it/scalars/testdata/cast.txt @@ -231,7 +231,7 @@ output : '-1' ast : CAST(1.1 AS VARIANT) raw expr : CAST(1.1 AS Variant) checked expr : CAST(1.1_d128(2,1) AS Variant) -optimized expr : 0x2000000020000009603ff199999999999a +optimized expr : 0x2000000020000013700000000000000000000000000000000b0201 output type : Variant output domain : Undefined output : '1.1' @@ -266,7 +266,7 @@ output : '[0,"a"]' ast : CAST(to_timestamp(1000000) AS VARIANT) raw expr : CAST(to_timestamp(1000000) AS Variant) checked expr : CAST(CAST(CAST(1000000_u32 AS Int64) AS Timestamp) AS Variant) -optimized expr : 0x200000001000001a313937302d30312d31322031333a34363a34302e303030303030 +optimized expr : 0x200000006000000920000000e8d4a51000 output type : Variant output domain : Undefined output : '"1970-01-12 13:46:40.000000"' @@ -2409,7 +2409,7 @@ output : '-1' ast : TRY_CAST(1.1 AS VARIANT) raw expr : TRY_CAST(1.1 AS Variant) checked expr : TRY_CAST(1.1_d128(2,1) AS Variant NULL) -optimized expr : 0x2000000020000009603ff199999999999a +optimized expr : 0x2000000020000013700000000000000000000000000000000b0201 output type : Variant NULL output domain : Undefined output : '1.1' @@ -2445,7 +2445,7 @@ output : '[0,"a"]' ast : TRY_CAST(to_timestamp(1000000) AS VARIANT) raw expr : TRY_CAST(to_timestamp(1000000) AS Variant) checked expr : TRY_CAST(CAST(CAST(1000000_u32 AS Int64) AS Timestamp) AS Variant NULL) -optimized expr : 0x200000001000001a313937302d30312d31322031333a34363a34302e303030303030 +optimized expr : 0x200000006000000920000000e8d4a51000 output type : Variant NULL output domain : Undefined output : '"1970-01-12 13:46:40.000000"' diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 9efa774a2f74..fb4bb55d4216 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -2061,12 +2061,18 @@ Functions overloads: 1 instr(String NULL, String NULL) :: UInt64 NULL 0 is_array(Variant) :: Boolean 1 is_array(Variant NULL) :: Boolean NULL +0 is_binary(Variant) :: Boolean +1 is_binary(Variant NULL) :: Boolean NULL 0 is_boolean(Variant) :: Boolean 1 is_boolean(Variant NULL) :: Boolean NULL +0 is_date(Variant) :: Boolean +1 is_date(Variant NULL) :: Boolean NULL 0 is_float(Variant) :: Boolean 1 is_float(Variant NULL) :: Boolean NULL 0 is_integer(Variant) :: Boolean 1 is_integer(Variant NULL) :: Boolean NULL +0 is_interval(Variant) :: Boolean +1 is_interval(Variant NULL) :: Boolean NULL 0 is_not_error(T0) :: Boolean 0 is_not_null(NULL) :: Boolean 1 is_not_null(T0 NULL) :: Boolean @@ -2076,6 +2082,8 @@ Functions overloads: 1 is_object(Variant NULL) :: Boolean NULL 0 is_string(Variant) :: Boolean 1 is_string(Variant NULL) :: Boolean NULL +0 is_timestamp(Variant) :: Boolean +1 is_timestamp(Variant NULL) :: Boolean NULL 0 is_true(Boolean) :: Boolean 1 is_true(Boolean NULL) :: Boolean 0 jaro_winkler(String, String) :: Float64 diff --git a/src/query/functions/tests/it/scalars/testdata/variant.txt b/src/query/functions/tests/it/scalars/testdata/variant.txt index 159c383615b7..9fbd2538d057 100644 --- a/src/query/functions/tests/it/scalars/testdata/variant.txt +++ b/src/query/functions/tests/it/scalars/testdata/variant.txt @@ -1064,6 +1064,42 @@ output domain : Undefined output : '{"a":"b"}' +ast : as_binary(to_binary('abcd')::variant) +raw expr : as_binary(CAST(to_binary('abcd') AS Variant)) +checked expr : as_binary(CAST(CAST("abcd" AS Binary) AS Variant)) +optimized expr : 61626364 +output type : Binary NULL +output domain : Undefined +output : 61626364 + + +ast : as_date(to_date('2025-10-11')::variant) +raw expr : as_date(CAST(to_date('2025-10-11') AS Variant)) +checked expr : as_date(CAST(CAST("2025-10-11" AS Date) AS Variant)) +optimized expr : 20372 +output type : Date NULL +output domain : {20372..=20372} +output : '2025-10-11' + + +ast : as_timestamp(to_timestamp('2025-05-01 10:00:00')::variant) +raw expr : as_timestamp(CAST(to_timestamp('2025-05-01 10:00:00') AS Variant)) +checked expr : as_timestamp(CAST(CAST("2025-05-01 10:00:00" AS Timestamp) AS Variant)) +optimized expr : 1746093600000000 +output type : Timestamp NULL +output domain : {1746093600000000..=1746093600000000} +output : '2025-05-01 10:00:00.000000' + + +ast : as_interval(to_interval('1 year 2 month')::variant) +raw expr : as_interval(CAST(to_interval('1 year 2 month') AS Variant)) +checked expr : as_interval(CAST(CAST("1 year 2 month" AS Interval) AS Variant)) +optimized expr : 1 year 2 months +output type : Interval NULL +output domain : SimpleDomain { min: months_days_micros(1109194275199700726309615304704), max: months_days_micros(1109194275199700726309615304704) } +output : '1 year 2 months' + + ast : as_boolean(parse_json(s)) raw expr : as_boolean(parse_json(s::String)) checked expr : as_boolean(CAST(s AS Variant)) @@ -1346,6 +1382,42 @@ output domain : {TRUE} output : true +ast : is_binary(to_binary('abcd')::variant) +raw expr : is_binary(CAST(to_binary('abcd') AS Variant)) +checked expr : is_binary(CAST(CAST("abcd" AS Binary) AS Variant)) +optimized expr : true +output type : Boolean +output domain : {TRUE} +output : true + + +ast : is_date(to_date('2025-10-11')::variant) +raw expr : is_date(CAST(to_date('2025-10-11') AS Variant)) +checked expr : is_date(CAST(CAST("2025-10-11" AS Date) AS Variant)) +optimized expr : true +output type : Boolean +output domain : {TRUE} +output : true + + +ast : is_timestamp(to_timestamp('2025-05-01 10:00:00')::variant) +raw expr : is_timestamp(CAST(to_timestamp('2025-05-01 10:00:00') AS Variant)) +checked expr : is_timestamp(CAST(CAST("2025-05-01 10:00:00" AS Timestamp) AS Variant)) +optimized expr : true +output type : Boolean +output domain : {TRUE} +output : true + + +ast : is_interval(to_interval('1 year 2 month')::variant) +raw expr : is_interval(CAST(to_interval('1 year 2 month') AS Variant)) +checked expr : is_interval(CAST(CAST("1 year 2 month" AS Interval) AS Variant)) +optimized expr : true +output type : Boolean +output domain : {TRUE} +output : true + + ast : is_null_value(parse_json(s)) raw expr : is_null_value(parse_json(s::String)) checked expr : is_null_value(CAST(s AS Variant)) diff --git a/src/query/functions/tests/it/scalars/variant.rs b/src/query/functions/tests/it/scalars/variant.rs index d7a708912c86..d735f11a78fa 100644 --- a/src/query/functions/tests/it/scalars/variant.rs +++ b/src/query/functions/tests/it/scalars/variant.rs @@ -510,6 +510,18 @@ fn test_as_type(file: &mut impl Write) { run_ast(file, "as_array(parse_json('{\"a\":\"b\"}'))", &[]); run_ast(file, "as_object(parse_json('[1,2,3]'))", &[]); run_ast(file, "as_object(parse_json('{\"a\":\"b\"}'))", &[]); + run_ast(file, "as_binary(to_binary('abcd')::variant)", &[]); + run_ast(file, "as_date(to_date('2025-10-11')::variant)", &[]); + run_ast( + file, + "as_timestamp(to_timestamp('2025-05-01 10:00:00')::variant)", + &[], + ); + run_ast( + file, + "as_interval(to_interval('1 year 2 month')::variant)", + &[], + ); let columns = &[( "s", @@ -546,6 +558,18 @@ fn test_is_type(file: &mut impl Write) { run_ast(file, "is_array(parse_json('{\"a\":\"b\"}'))", &[]); run_ast(file, "is_object(parse_json('[1,2,3]'))", &[]); run_ast(file, "is_object(parse_json('{\"a\":\"b\"}'))", &[]); + run_ast(file, "is_binary(to_binary('abcd')::variant)", &[]); + run_ast(file, "is_date(to_date('2025-10-11')::variant)", &[]); + run_ast( + file, + "is_timestamp(to_timestamp('2025-05-01 10:00:00')::variant)", + &[], + ); + run_ast( + file, + "is_interval(to_interval('1 year 2 month')::variant)", + &[], + ); let columns = &[( "s", diff --git a/src/query/service/src/servers/mysql/writers/query_result_writer.rs b/src/query/service/src/servers/mysql/writers/query_result_writer.rs index d7f2ae88d8b3..1503d8db730d 100644 --- a/src/query/service/src/servers/mysql/writers/query_result_writer.rs +++ b/src/query/service/src/servers/mysql/writers/query_result_writer.rs @@ -191,6 +191,7 @@ impl<'a, W: AsyncWrite + Send + Unpin> DFQueryResultWriter<'a, W> { DataType::Geometry => Ok(ColumnType::MYSQL_TYPE_GEOMETRY), DataType::Geography => Ok(ColumnType::MYSQL_TYPE_GEOMETRY), DataType::Decimal(_) => Ok(ColumnType::MYSQL_TYPE_DECIMAL), + DataType::Interval => Ok(ColumnType::MYSQL_TYPE_VARCHAR), _ => Err(ErrorCode::Unimplemented(format!( "Unsupported column type:{:?}", field.data_type() diff --git a/tests/sqllogictests/suites/query/functions/02_0056_function_semi_structureds_as.test b/tests/sqllogictests/suites/query/functions/02_0056_function_semi_structureds_as.test index 44af537ae785..bb3a5201343c 100644 --- a/tests/sqllogictests/suites/query/functions/02_0056_function_semi_structureds_as.test +++ b/tests/sqllogictests/suites/query/functions/02_0056_function_semi_structureds_as.test @@ -58,6 +58,25 @@ select as_object(parse_json('{"k": 123}')) ---- {"k":123} +query TT +select to_binary('abc')::variant, as_binary(to_binary('abc')::variant) +---- +"616263" 616263 + +query TT +select to_date('2025-01-01')::variant, as_date(to_date('2025-01-01')::variant) +---- +"2025-01-01" 2025-01-01 + +query TT +select to_timestamp('2025-01-01 10:00:00')::variant, as_timestamp(to_timestamp('2025-01-01 10:00:00')::variant) +---- +"2025-01-01 10:00:00.000000" 2025-01-01 10:00:00.000000 + +query TT +select to_interval('10 months 2 days')::variant, as_interval(to_interval('10 months 2 days')::variant) +---- +"10 months 2 days" 10 months 2 days query B select is_null_value(parse_json('null')) @@ -129,6 +148,27 @@ select is_object(parse_json('["a","b","c"]')) ---- 0 +query B +select is_binary(to_binary('abc')::variant) +---- +1 + +query B +select is_date(to_date('2025-01-01')::variant) +---- +1 + +query B +select is_timestamp(to_timestamp('2025-01-01 10:00:00')::variant) +---- +1 + +query B +select is_interval(to_interval('10 months 2 days')::variant) +---- +1 + + statement ok DROP DATABASE IF EXISTS db1 @@ -187,3 +227,4 @@ select id, v, is_null_value(v), is_boolean(v), is_integer(v), is_float(v), is_st statement ok DROP DATABASE db1 + From 631f1ffe7bb347ed5178cba4be234ffbd9b127f4 Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 25 Apr 2025 16:33:27 +0800 Subject: [PATCH 10/13] fix --- Cargo.lock | 6 ------ src/common/native/Cargo.toml | 1 - src/query/functions/Cargo.toml | 1 - src/query/functions/src/scalars/arithmetic/Cargo.toml | 1 - src/query/functions/src/scalars/decimal/Cargo.toml | 1 - src/query/storages/parquet/Cargo.toml | 1 - src/query/storages/stage/Cargo.toml | 1 - 7 files changed, 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7bc4d7b85d51..05df4dc25d32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3292,7 +3292,6 @@ dependencies = [ "databend-functions-scalar-math", "databend-functions-scalar-numeric-basic-arithmetic", "divan", - "ethnum", "geo", "geohash", "geozero", @@ -3810,7 +3809,6 @@ dependencies = [ "databend-common-column", "databend-common-expression", "env_logger 0.11.5", - "ethnum", "hashbrown 0.14.5", "log", "lz4", @@ -4372,7 +4370,6 @@ dependencies = [ "databend-storages-common-pruner", "databend-storages-common-stage", "databend-storages-common-table-meta", - "ethnum", "futures", "log", "opendal 0.53.1", @@ -4461,7 +4458,6 @@ dependencies = [ "databend-storages-common-stage", "databend-storages-common-table-meta", "enum-as-inner", - "ethnum", "futures", "jsonb", "lexical-core", @@ -4859,7 +4855,6 @@ version = "0.1.0" dependencies = [ "databend-common-expression", "databend-functions-scalar-decimal", - "ethnum", "lexical-core", "match-template", "num-traits", @@ -4884,7 +4879,6 @@ version = "0.1.0" dependencies = [ "databend-common-base", "databend-common-expression", - "ethnum", "match-template", "num-traits", ] diff --git a/src/common/native/Cargo.toml b/src/common/native/Cargo.toml index 3359efeb05e5..ccd1e249c91b 100644 --- a/src/common/native/Cargo.toml +++ b/src/common/native/Cargo.toml @@ -18,7 +18,6 @@ bytemuck = { workspace = true } byteorder = { workspace = true } bytes = { workspace = true } env_logger = { workspace = true } -ethnum = { workspace = true } hashbrown_v0_14 = { workspace = true } log = { workspace = true } lz4 = { workspace = true } diff --git a/src/query/functions/Cargo.toml b/src/query/functions/Cargo.toml index 9b3dbe4e9a0f..7a79946245e9 100644 --- a/src/query/functions/Cargo.toml +++ b/src/query/functions/Cargo.toml @@ -30,7 +30,6 @@ databend-functions-scalar-geo = { workspace = true } databend-functions-scalar-integer-basic-arithmetic = { workspace = true } databend-functions-scalar-math = { workspace = true } databend-functions-scalar-numeric-basic-arithmetic = { workspace = true } -ethnum = { workspace = true } geo = { workspace = true } geohash = { workspace = true } geozero = { workspace = true } diff --git a/src/query/functions/src/scalars/arithmetic/Cargo.toml b/src/query/functions/src/scalars/arithmetic/Cargo.toml index e9dac253ce7c..8b78af97ee69 100644 --- a/src/query/functions/src/scalars/arithmetic/Cargo.toml +++ b/src/query/functions/src/scalars/arithmetic/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" [dependencies] databend-common-expression = { workspace = true } databend-functions-scalar-decimal = { workspace = true } -ethnum = { workspace = true } lexical-core = { workspace = true } match-template = { workspace = true } num-traits = { workspace = true } diff --git a/src/query/functions/src/scalars/decimal/Cargo.toml b/src/query/functions/src/scalars/decimal/Cargo.toml index dcf99820fe16..4542396fad59 100644 --- a/src/query/functions/src/scalars/decimal/Cargo.toml +++ b/src/query/functions/src/scalars/decimal/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" [dependencies] databend-common-base = { workspace = true } databend-common-expression = { workspace = true } -ethnum = { workspace = true } match-template = { workspace = true } num-traits = { workspace = true } diff --git a/src/query/storages/parquet/Cargo.toml b/src/query/storages/parquet/Cargo.toml index 5de7ef57db6f..237cf1217808 100644 --- a/src/query/storages/parquet/Cargo.toml +++ b/src/query/storages/parquet/Cargo.toml @@ -30,7 +30,6 @@ databend-storages-common-cache = { workspace = true } databend-storages-common-pruner = { workspace = true } databend-storages-common-stage = { workspace = true } databend-storages-common-table-meta = { workspace = true } -ethnum = { workspace = true } futures = { workspace = true } log = { workspace = true } opendal = { workspace = true } diff --git a/src/query/storages/stage/Cargo.toml b/src/query/storages/stage/Cargo.toml index 819eb79df1fe..8aa6da5dee40 100644 --- a/src/query/storages/stage/Cargo.toml +++ b/src/query/storages/stage/Cargo.toml @@ -35,7 +35,6 @@ databend-common-version = { workspace = true } databend-storages-common-stage = { workspace = true } databend-storages-common-table-meta = { workspace = true } enum-as-inner = { workspace = true } -ethnum = { workspace = true } futures = { workspace = true } jsonb = { workspace = true } lexical-core = { workspace = true } From f529433886c40790dd623829f0090a94271bc0c3 Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 25 Apr 2025 21:10:25 +0800 Subject: [PATCH 11/13] fix --- src/query/expression/src/types/decimal.rs | 152 ++++++++++++---------- 1 file changed, 84 insertions(+), 68 deletions(-) diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index 5fc875c3a0c7..d1b2d12bf274 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -2171,62 +2171,22 @@ impl i256 { #[inline] pub const fn to_le_bytes(&self) -> [u8; 32] { - let (high, low) = self.0.into_words(); - let low = low.to_le_bytes(); - let high = high.to_le_bytes(); - let mut i = 0; - let mut bytes = [0u8; 32]; - while i != 16 { - bytes[i] = low[i]; - bytes[i + 16] = high[i]; - i += 1; - } - bytes + self.0.to_le_bytes() } #[inline] pub const fn to_be_bytes(&self) -> [u8; 32] { - let (high, low) = self.0.into_words(); - let low = low.to_be_bytes(); - let high = high.to_be_bytes(); - let mut bytes = [0; 32]; - let mut i = 0; - while i != 16 { - bytes[i] = high[i]; - bytes[i + 16] = low[i]; - i += 1; - } - bytes + self.0.to_be_bytes() } #[inline] pub const fn from_be_bytes(bytes: [u8; 32]) -> Self { - let mut low = [0; 16]; - let mut high = [0; 16]; - let mut i = 0; - while i != 16 { - high[i] = bytes[i]; - low[i] = bytes[i + 16]; - i += 1; - } - let high = i128::from_be_bytes(high); - let low = i128::from_be_bytes(low); - Self(ethnum::I256::from_words(high, low)) + Self(ethnum::I256::from_be_bytes(bytes)) } #[inline] pub const fn from_le_bytes(bytes: [u8; 32]) -> Self { - let mut low = [0; 16]; - let mut high = [0; 16]; - let mut i = 0; - while i != 16 { - low[i] = bytes[i]; - high[i] = bytes[i + 16]; - i += 1; - } - let high = i128::from_be_bytes(high); - let low = i128::from_be_bytes(low); - Self(ethnum::I256::from_words(high, low)) + Self(ethnum::I256::from_le_bytes(bytes)) } #[inline] @@ -2244,27 +2204,27 @@ impl i256 { Self(self.0.saturating_abs()) } - #[inline(always)] + #[inline] pub fn leading_zeros(self) -> u32 { self.0.leading_zeros() } - /// Cast to a primitive `f32`. + /// Cast to a primitive `i8`. #[inline] - pub fn as_f32(self) -> f32 { - self.0.as_f32() + pub const fn as_i8(self) -> i8 { + self.0.as_i8() } - /// Cast to a primitive `f64`. + /// Cast to a primitive `i16`. #[inline] - pub fn as_f64(self) -> f64 { - self.0.as_f64() + pub const fn as_i16(self) -> i16 { + self.0.as_i16() } - /// Cast to a primitive `u64`. + /// Cast to a primitive `i32`. #[inline] - pub const fn as_u64(self) -> u64 { - self.0.as_u64() + pub const fn as_i32(self) -> i32 { + self.0.as_i32() } /// Cast to a primitive `i64`. @@ -2279,6 +2239,66 @@ impl i256 { self.0.as_i128() } + /// Cast to a primitive `u8`. + #[inline] + pub const fn as_u8(self) -> u8 { + self.0.as_u8() + } + + /// Cast to a primitive `u16`. + #[inline] + pub const fn as_u16(self) -> u16 { + self.0.as_u16() + } + + /// Cast to a primitive `u32`. + #[inline] + pub const fn as_u32(self) -> u32 { + self.0.as_u32() + } + + /// Cast to a primitive `u64`. + #[inline] + pub const fn as_u64(self) -> u64 { + self.0.as_u64() + } + + /// Cast to a primitive `u128`. + #[inline] + pub const fn as_u128(self) -> u128 { + self.0.as_u128() + } + + /// Cast to a primitive `u256`. + #[inline] + pub const fn as_u256(self) -> u256 { + self.0.as_u256() + } + + /// Cast to a primitive `isize`. + #[inline] + pub const fn as_isize(self) -> isize { + self.0.as_isize() + } + + /// Cast to a primitive `usize`. + #[inline] + pub const fn as_usize(self) -> usize { + self.0.as_usize() + } + + /// Cast to a primitive `f32`. + #[inline] + pub fn as_f32(self) -> f32 { + self.0.as_f32() + } + + /// Cast to a primitive `f64`. + #[inline] + pub fn as_f64(self) -> f64 { + self.0.as_f64() + } + /// Get the low 128-bit word for this signed integer. #[inline] pub fn low(&self) -> &i128 { @@ -2308,15 +2328,6 @@ impl i256 { } } -impl Neg for i256 { - type Output = Self; - - #[inline] - fn neg(self) -> Self::Output { - Self(self.0.checked_neg().expect("i256 overflow")) - } -} - impl std::fmt::Debug for i256 { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{:?}", self.0) @@ -2329,6 +2340,15 @@ impl std::fmt::Display for i256 { } } +impl Neg for i256 { + type Output = Self; + + #[inline] + fn neg(self) -> Self::Output { + Self(self.0.checked_neg().expect("i256 overflow")) + } +} + impl AddAssign for i256 { fn add_assign(&mut self, rhs: Self) { self.0 += rhs.0; @@ -2456,11 +2476,7 @@ impl core::hash::Hash for i256 { impl PartialEq for i256 { #[inline] fn eq(&self, other: &Self) -> bool { - let (ahi, alo) = self.0.into_words(); - let (bhi, blo) = other.0.into_words(); - (ahi == bhi) & (alo == blo) - // bitwise and rather than logical and - // to make O0 code more effecient. + self.0.eq(&other.0) } } From c8fa8c6c747b400e786f3abd979b0466bffb44d8 Mon Sep 17 00:00:00 2001 From: baishen Date: Sun, 27 Apr 2025 13:18:51 +0800 Subject: [PATCH 12/13] add i256 comment --- src/query/expression/src/types/decimal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index d1b2d12bf274..c006a03f06ca 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -2135,7 +2135,7 @@ pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ -99999999999999999999999999999999999999, ]; -/// Physical representation of a decimal +/// The wrapper of `ethnum::I256`, used to implement the `BorshSerialize` and `BorshDeserialize` traits. #[derive(Clone, Copy, Default, Eq, Serialize, Deserialize)] #[allow(non_camel_case_types)] #[repr(C)] From 6a3379c8594938eac792ca0fb22d9e465bdadfbb Mon Sep 17 00:00:00 2001 From: baishen Date: Sun, 27 Apr 2025 15:33:52 +0800 Subject: [PATCH 13/13] fix --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 05df4dc25d32..50049c413c20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9184,7 +9184,7 @@ dependencies = [ [[package]] name = "jsonb" version = "0.5.1" -source = "git+https://github.com/b41sh/jsonb?rev=73871c14bc11b65a1cba90bdb4ccc85f06c356e2#73871c14bc11b65a1cba90bdb4ccc85f06c356e2" +source = "git+https://github.com/databendlabs/jsonb?rev=dcaf261#dcaf261df0f2a41ce9029a91444611c1d401e390" dependencies = [ "byteorder", "ethnum", diff --git a/Cargo.toml b/Cargo.toml index dddc18f255b3..96571a0af960 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -637,7 +637,7 @@ backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "7226 color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "c149502" } display-more = { git = "https://github.com/databendlabs/display-more", tag = "v0.1.2" } -jsonb = { git = "https://github.com/b41sh/jsonb", rev = "73871c14bc11b65a1cba90bdb4ccc85f06c356e2" } +jsonb = { git = "https://github.com/databendlabs/jsonb", rev = "dcaf261" } map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.2.3" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.9" }