Skip to content

Commit dd45a24

Browse files
authored
Merge pull request #806 from holicc/main
Support PostgreSQL numeric type to decimal
2 parents 57387f4 + 4909b36 commit dd45a24

File tree

10 files changed

+376
-156
lines changed

10 files changed

+376
-156
lines changed

connectorx-python/connectorx/tests/test_arrow.py

Lines changed: 106 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55
from pandas.testing import assert_frame_equal
66
import datetime
7+
from decimal import localcontext, Decimal
78

89
from .. import read_sql
910

@@ -43,110 +44,115 @@ def test_arrow(postgres_url: str) -> None:
4344
df.sort_values(by="test_int", inplace=True, ignore_index=True)
4445
assert_frame_equal(df, expected, check_names=True)
4546

47+
def decimal_s10(val):
48+
return Decimal(val).quantize(Decimal("0.0000000001"))
49+
4650
def test_arrow_type(postgres_url: str) -> None:
4751
query = "SELECT test_date, test_timestamp, test_timestamptz, test_int2, test_int4, test_int8, test_float4, test_float8, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_bytea, test_json, test_jsonb, test_ltree, test_name FROM test_types"
4852
df = read_sql(postgres_url, query, return_type="arrow")
4953
df = df.to_pandas(date_as_object=False)
5054
df.sort_values(by="test_int2", inplace=True, ignore_index=True)
51-
expected = pd.DataFrame(
52-
index=range(5),
53-
data={
54-
"test_date": pd.Series(
55-
["1970-01-01", "2000-02-28", "2038-01-18", "1901-12-14", None], dtype="datetime64[ms]"
56-
),
57-
"test_timestamp": pd.Series(
58-
[
59-
"1970-01-01 00:00:01",
60-
"2000-02-28 12:00:10",
61-
"2038-01-18 23:59:59",
62-
"1901-12-14 00:00:00.062547",
63-
None,
64-
],
65-
dtype="datetime64[us]",
66-
),
67-
"test_timestamptz": pd.Series(
68-
[
69-
"1970-01-01 00:00:01+00:00",
70-
"2000-02-28 12:00:10-04:00",
71-
"2038-01-18 23:59:59+08:00",
72-
"1901-12-14 00:00:00.062547-12:00",
73-
None,
74-
],
75-
dtype="datetime64[us, UTC]",
76-
),
77-
"test_int2": pd.Series([-32768, 0, 1, 32767], dtype="int16"),
78-
"test_int4": pd.Series([0, 1, -2147483648, 2147483647], dtype="int32"),
79-
"test_int8": pd.Series(
80-
[-9223372036854775808, 0, 9223372036854775807, 1], dtype="float64"
81-
),
82-
"test_float4": pd.Series(
83-
[-1.1, 0.00, 2.123456, -12345.1, None], dtype="float32"
84-
),
85-
"test_float8": pd.Series(
86-
[-1.1, 0.00, 2.12345678901, -12345678901.1, None], dtype="float64"
87-
),
88-
"test_numeric": pd.Series([0.01, 521.34, 0, -1.123e2, None], dtype="float64"),
89-
"test_bpchar": pd.Series(["👨‍🍳 ", "bb ", " ", "ddddd", None], dtype="object"),
90-
"test_char": pd.Series(["a", "ಠ", "😃", "@", None], dtype="object"),
91-
"test_varchar": pd.Series(["abcdefghij", "", "👨‍🍳👨‍🍳👨‍🍳👨", "@", None], dtype="object"),
92-
"test_uuid": pd.Series(
93-
[
94-
"a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
95-
"a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
96-
"a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
97-
"a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
98-
None,
99-
],
100-
dtype="object",
101-
),
102-
"test_time": pd.Series(
103-
[
104-
datetime.time(8, 12, 40),
105-
datetime.time(18, 30),
106-
datetime.time(23, 0, 10),
107-
datetime.time(0, 0, 59, 62547),
108-
None,
109-
],
110-
dtype="object",
111-
),
112-
"test_bytea": pd.Series(
113-
[
114-
b'\x08',
115-
b"\xd0\x97\xd0\xb4\xd1\x80\xd0\xb0\xcc\x81\xd0\xb2\xd1\x81\xd1\x82\xd0\xb2\xd1\x83\xd0\xb9\xd1\x82\xd0\xb5",
116-
b"",
117-
b"\xf0\x9f\x98\x9c",
118-
None
119-
],
120-
dtype="object",
121-
),
122-
"test_json": pd.Series(
123-
[
124-
'{"customer":"John Doe","items":{"product":"Beer","qty":6}}',
125-
'{"customer":"Lily Bush","items":{"product":"Diaper","qty":24}}',
126-
'{"customer":"Josh William","items":{"product":"Toy Car","qty":1}}',
127-
'{}',
128-
None,
129-
],
130-
dtype="object",
131-
),
132-
"test_jsonb": pd.Series(
133-
[
134-
'{"customer":"John Doe","items":{"product":"Beer","qty":6}}',
135-
'{"customer":"Lily Bush","items":{"product":"Diaper","qty":24}}',
136-
'{"customer":"Josh William","items":{"product":"Toy Car","qty":1}}',
137-
'{}',
138-
None,
139-
],
140-
dtype="object",
141-
),
142-
"test_ltree": pd.Series(
143-
["A.B.C.D", "A.B.E", "A", "", None], dtype="object"
144-
),
145-
"test_name": pd.Series(
146-
["0", "21", "someName", "101203203-1212323-22131235", None]
147-
)
148-
149-
},
150-
)
55+
with localcontext() as ctx:
56+
ctx.prec = 38
57+
expected = pd.DataFrame(
58+
index=range(5),
59+
data={
60+
"test_date": pd.Series(
61+
["1970-01-01", "2000-02-28", "2038-01-18", "1901-12-14", None], dtype="datetime64[ms]"
62+
),
63+
"test_timestamp": pd.Series(
64+
[
65+
"1970-01-01 00:00:01",
66+
"2000-02-28 12:00:10",
67+
"2038-01-18 23:59:59",
68+
"1901-12-14 00:00:00.062547",
69+
None,
70+
],
71+
dtype="datetime64[us]",
72+
),
73+
"test_timestamptz": pd.Series(
74+
[
75+
"1970-01-01 00:00:01+00:00",
76+
"2000-02-28 12:00:10-04:00",
77+
"2038-01-18 23:59:59+08:00",
78+
"1901-12-14 00:00:00.062547-12:00",
79+
None,
80+
],
81+
dtype="datetime64[us, UTC]",
82+
),
83+
"test_int2": pd.Series([-32768, 0, 1, 32767], dtype="int16"),
84+
"test_int4": pd.Series([0, 1, -2147483648, 2147483647], dtype="int32"),
85+
"test_int8": pd.Series(
86+
[-9223372036854775808, 0, 9223372036854775807, 1], dtype="float64"
87+
),
88+
"test_float4": pd.Series(
89+
[-1.1, 0.00, 2.123456, -12345.1, None], dtype="float32"
90+
),
91+
"test_float8": pd.Series(
92+
[-1.1, 0.00, 2.12345678901, -12345678901.1, None], dtype="float64"
93+
),
94+
"test_numeric": pd.Series([decimal_s10(0.01), decimal_s10(521.34), decimal_s10(0), decimal_s10(-1.123e2), None], dtype="object"),
95+
"test_bpchar": pd.Series(["👨‍🍳 ", "bb ", " ", "ddddd", None], dtype="object"),
96+
"test_char": pd.Series(["a", "ಠ", "😃", "@", None], dtype="object"),
97+
"test_varchar": pd.Series(["abcdefghij", "", "👨‍🍳👨‍🍳👨‍🍳👨", "@", None], dtype="object"),
98+
"test_uuid": pd.Series(
99+
[
100+
"a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
101+
"a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
102+
"a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
103+
"a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
104+
None,
105+
],
106+
dtype="object",
107+
),
108+
"test_time": pd.Series(
109+
[
110+
datetime.time(8, 12, 40),
111+
datetime.time(18, 30),
112+
datetime.time(23, 0, 10),
113+
datetime.time(0, 0, 59, 62547),
114+
None,
115+
],
116+
dtype="object",
117+
),
118+
"test_bytea": pd.Series(
119+
[
120+
b'\x08',
121+
b"\xd0\x97\xd0\xb4\xd1\x80\xd0\xb0\xcc\x81\xd0\xb2\xd1\x81\xd1\x82\xd0\xb2\xd1\x83\xd0\xb9\xd1\x82\xd0\xb5",
122+
b"",
123+
b"\xf0\x9f\x98\x9c",
124+
None
125+
],
126+
dtype="object",
127+
),
128+
"test_json": pd.Series(
129+
[
130+
'{"customer":"John Doe","items":{"product":"Beer","qty":6}}',
131+
'{"customer":"Lily Bush","items":{"product":"Diaper","qty":24}}',
132+
'{"customer":"Josh William","items":{"product":"Toy Car","qty":1}}',
133+
'{}',
134+
None,
135+
],
136+
dtype="object",
137+
),
138+
"test_jsonb": pd.Series(
139+
[
140+
'{"customer":"John Doe","items":{"product":"Beer","qty":6}}',
141+
'{"customer":"Lily Bush","items":{"product":"Diaper","qty":24}}',
142+
'{"customer":"Josh William","items":{"product":"Toy Car","qty":1}}',
143+
'{}',
144+
None,
145+
],
146+
dtype="object",
147+
),
148+
"test_ltree": pd.Series(
149+
["A.B.C.D", "A.B.E", "A", "", None], dtype="object"
150+
),
151+
"test_name": pd.Series(
152+
["0", "21", "someName", "101203203-1212323-22131235", None]
153+
)
154+
155+
},
156+
)
151157

152158
assert_frame_equal(df, expected, check_names=True)

connectorx/src/constants.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
#[cfg(feature = "dst_arrow")]
2+
use arrow::datatypes::DataType as ArrowDataType;
3+
4+
#[cfg(feature = "dst_arrow")]
5+
pub const DEFAULT_ARROW_DECIMAL_PRECISION: u8 = 38;
6+
7+
#[cfg(feature = "dst_arrow")]
8+
pub const DEFAULT_ARROW_DECIMAL_SCALE: i8 = 10;
9+
10+
#[cfg(feature = "dst_arrow")]
11+
pub const DEFAULT_ARROW_DECIMAL: ArrowDataType =
12+
ArrowDataType::Decimal128(DEFAULT_ARROW_DECIMAL_PRECISION, DEFAULT_ARROW_DECIMAL_SCALE);
13+
114
#[cfg(feature = "dst_arrow")]
215
pub(crate) const SECONDS_IN_DAY: i64 = 86_400;
316

0 commit comments

Comments
 (0)