Skip to content

Commit e492694

Browse files
Fixed date handling to match PyArrow's and avoid localization issues
(#499) which also corrects the bug on Windows when fetching dates prior to 1970 and after 2038 (#483).
1 parent d8c38e4 commit e492694

File tree

6 files changed

+110
-23
lines changed

6 files changed

+110
-23
lines changed

doc/src/release_notes.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ Common Changes
6060
parameter "min". Previously python-oracledb Thin mode did not raise an
6161
error and python-oracledb Thick mode raised the exception
6262
``ORA-24413: Invalid number of sessions specified``.
63+
#) Improvements to :ref:`data frames <dataframeformat>`:
64+
65+
- Fixed date handling to match PyArrow's and avoid localization issues
66+
(`issue 499 <https://github.com/oracle/python-oracledb/issues/499>`__).
67+
68+
- Fixed bug on Windows when fetching dates prior to 1970 and after 2038
69+
(`issue 483 <https://github.com/oracle/python-oracledb/issues/483>`__).
70+
6371
#) Improved the test suite and documentation.
6472

6573

src/oracledb/base_impl.pxd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ from libc.stdint cimport int8_t, int16_t, int32_t, int64_t
3535
from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
3636
from libc.stdlib cimport abs
3737
from cpython cimport array
38+
cimport cpython.datetime as cydatetime
3839

3940
ctypedef unsigned char char_type
4041

@@ -982,7 +983,7 @@ cdef object convert_oracle_data_to_python(OracleMetadata from_metadata,
982983
OracleData* data,
983984
const char* encoding_errors,
984985
bint from_dbobject)
985-
cdef object convert_date_to_python(OracleDataBuffer *buffer)
986+
cdef cydatetime.datetime convert_date_to_python(OracleDataBuffer *buffer)
986987
cdef uint16_t decode_uint16be(const char_type *buf)
987988
cdef uint32_t decode_uint32be(const char_type *buf)
988989
cdef uint16_t decode_uint16le(const char_type *buf)

src/oracledb/base_impl.pyx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ cdef const char* DRIVER_INSTALLATION_URL = \
116116
cdef const char* ENCODING_UTF8 = "UTF-8"
117117
cdef const char* ENCODING_UTF16 = "UTF-16BE"
118118

119+
# variables needed for dates when using pyarrow
120+
cdef cydatetime.datetime EPOCH_DATE = datetime.datetime(1970, 1, 1)
121+
119122
# protocols registered with the library
120123
REGISTERED_PROTOCOLS = {}
121124

src/oracledb/impl/base/converters.pyx

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@
2929
# form returned by the decoders to an appropriate Python value.
3030
#------------------------------------------------------------------------------
3131

32-
cdef object convert_date_to_python(OracleDataBuffer *buffer):
32+
cdef cydatetime.datetime convert_date_to_python(OracleDataBuffer *buffer):
3333
"""
3434
Converts a DATE, TIMESTAMP, TIMESTAMP WITH LOCAL TIME ZONE or TIMESTAMP
3535
WITH TIMEZONE value stored in the buffer to Python datetime.datetime().
3636
"""
3737
cdef:
3838
OracleDate *value = &buffer.as_date
39+
cydatetime.datetime output
3940
int32_t seconds
4041
output = cydatetime.datetime_new(value.year, value.month, value.day,
4142
value.hour, value.minute, value.second,
@@ -46,6 +47,22 @@ cdef object convert_date_to_python(OracleDataBuffer *buffer):
4647
return output
4748

4849

50+
cdef int convert_date_to_arrow_timestamp(OracleArrowArray arrow_array,
51+
OracleDataBuffer *buffer) except -1:
52+
"""
53+
Converts a DATE, TIMESTAMP, TIMESTAMP WITH LOCAL TIME ZONE or TIMESTAMP
54+
WITH TIMEZONE value stored in the buffer to Arrow timestamp.
55+
"""
56+
cdef:
57+
cydatetime.timedelta td
58+
cydatetime.datetime dt
59+
int64_t ts
60+
dt = convert_date_to_python(buffer)
61+
td = dt - EPOCH_DATE
62+
ts = int(cydatetime.total_seconds(td) * arrow_array.factor)
63+
arrow_array.append_int64(ts)
64+
65+
4966
cdef object convert_interval_ds_to_python(OracleDataBuffer *buffer):
5067
"""
5168
Converts an INTERVAL DAYS TO SECONDS value stored in the buffer to Python
@@ -215,7 +232,6 @@ cdef int convert_oracle_data_to_arrow(OracleMetadata from_metadata,
215232
ArrowType arrow_type
216233
uint32_t db_type_num
217234
OracleRawBytes* rb
218-
int64_t ts
219235

220236
# NULL values
221237
if data.is_null:
@@ -243,9 +259,7 @@ cdef int convert_oracle_data_to_arrow(OracleMetadata from_metadata,
243259
rb = &data.buffer.as_raw_bytes
244260
arrow_array.append_bytes(<void*> rb.ptr, rb.num_bytes)
245261
elif arrow_type == NANOARROW_TYPE_TIMESTAMP:
246-
ts = int(convert_date_to_python(&data.buffer).timestamp() *
247-
arrow_array.factor)
248-
arrow_array.append_int64(ts)
262+
convert_date_to_arrow_timestamp(arrow_array, &data.buffer)
249263
elif arrow_type == NANOARROW_TYPE_DECIMAL128:
250264
convert_number_to_arrow_decimal(arrow_array, &data.buffer)
251265

tests/test_8000_dataframe.py

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"""
2626
Module for testing dataframes
2727
"""
28+
2829
import datetime
2930
import decimal
3031
import unittest
@@ -49,7 +50,7 @@
4950
"Doe",
5051
"San Francisco",
5152
"USA",
52-
datetime.date(1989, 8, 22),
53+
datetime.date(1955, 7, 1), # summer(before 1970)
5354
12132.40,
5455
400,
5556
datetime.datetime.now(),
@@ -60,7 +61,7 @@
6061
"Hero",
6162
"San Fransokyo",
6263
"Japansa",
63-
datetime.date(1988, 8, 22),
64+
datetime.date(1955, 1, 1), # winter(before 1970)
6465
234234.32,
6566
400,
6667
datetime.datetime.now(),
@@ -75,7 +76,7 @@
7576
"Doe",
7677
"San Francisco",
7778
"USA",
78-
datetime.date(1989, 8, 22),
79+
datetime.date(2000, 7, 1), # summer(between)
7980
None,
8081
400,
8182
datetime.datetime.now(),
@@ -86,7 +87,29 @@
8687
"Hero",
8788
"San Fransokyo",
8889
None,
89-
datetime.date(1988, 8, 22),
90+
datetime.date(2000, 1, 1), # winter(between)
91+
-12312.1,
92+
0,
93+
datetime.datetime.now(),
94+
),
95+
(
96+
3,
97+
"Johns",
98+
"Does",
99+
"San Franciscos",
100+
"USAs",
101+
datetime.date(2040, 7, 1), # summer(after)
102+
None,
103+
500,
104+
datetime.datetime.now(),
105+
),
106+
(
107+
4,
108+
"Bigs",
109+
"Heros",
110+
"San Fransokyos",
111+
None,
112+
datetime.date(2040, 1, 1), # winter(after)
90113
-12312.1,
91114
0,
92115
datetime.datetime.now(),
@@ -221,6 +244,12 @@ def __check_interop(self):
221244
if not HAS_INTEROP:
222245
self.skipTest("missing pandas or pyarrow modules")
223246

247+
def __convert_date(self, value):
248+
"""
249+
Converts a date to the format required by Arrow.
250+
"""
251+
return (value - datetime.datetime(1970, 1, 1)).total_seconds()
252+
224253
def __convert_to_array(self, data, typ):
225254
"""
226255
Convert raw data to an Arrow array using pyarrow.
@@ -233,11 +262,13 @@ def __convert_to_array(self, data, typ):
233262
elif isinstance(typ, pyarrow.TimestampType):
234263
if typ.unit == "s":
235264
data = [
236-
datetime.datetime(v.year, v.month, v.day).timestamp()
265+
self.__convert_date(
266+
datetime.datetime(v.year, v.month, v.day)
267+
)
237268
for v in data
238269
]
239270
else:
240-
data = [value.timestamp() * 1000000 for value in data]
271+
data = [self.__convert_date(value) * 1000000 for value in data]
241272
mask = [value is None for value in data]
242273
return pyarrow.array(data, typ, mask=mask)
243274

@@ -459,7 +490,7 @@ def test_8015(self):
459490
ora_df = self.conn.fetch_df_all(statement)
460491
col = ora_df.get_column_by_name("SALARY")
461492
self.assertEqual(col.size(), len(DATASET_2))
462-
self.assertEqual(col.null_count, 1)
493+
self.assertEqual(col.null_count, 2)
463494

464495
def test_8016(self):
465496
"8016 - check unsupported error"
@@ -506,16 +537,16 @@ def test_8020(self):
506537
ora_col = ora_df.get_column(0)
507538
self.assertEqual(ora_col.describe_null[0], 0)
508539
self.assertEqual(ora_col.dtype[0], 0)
509-
metadata = {"name": "ID", "size": 2, "num_chunks": 1}
540+
metadata = {"name": "ID", "size": 4, "num_chunks": 1}
510541
self.assertEqual(metadata, ora_col.metadata)
511542
self.assertEqual(ora_col.null_count, 0)
512543

513544
ora_col = ora_df.get_column(4)
514545
self.assertEqual(ora_col.describe_null[0], 3)
515546
self.assertEqual(ora_col.dtype[0], 21)
516-
metadata = {"name": "COUNTRY", "size": 2, "num_chunks": 1}
547+
metadata = {"name": "COUNTRY", "size": 4, "num_chunks": 1}
517548
self.assertEqual(metadata, ora_col.metadata)
518-
self.assertEqual(ora_col.null_count, 1)
549+
self.assertEqual(ora_col.null_count, 2)
519550

520551
def test_8021(self):
521552
"8021 - batches with size that has duplicate rows across batches"

tests/test_8100_dataframe_async.py

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
"Doe",
5151
"San Francisco",
5252
"USA",
53-
datetime.date(1989, 8, 22),
53+
datetime.date(1955, 7, 1), # summer(before 1970)
5454
12132.40,
5555
400,
5656
datetime.datetime.now(),
@@ -61,7 +61,7 @@
6161
"Hero",
6262
"San Fransokyo",
6363
"Japansa",
64-
datetime.date(1988, 8, 22),
64+
datetime.date(1955, 1, 1), # winter(before 1970)
6565
234234.32,
6666
400,
6767
datetime.datetime.now(),
@@ -76,7 +76,7 @@
7676
"Doe",
7777
"San Francisco",
7878
"USA",
79-
datetime.date(1989, 8, 22),
79+
datetime.date(2000, 7, 1), # summer(between)
8080
None,
8181
400,
8282
datetime.datetime.now(),
@@ -87,7 +87,29 @@
8787
"Hero",
8888
"San Fransokyo",
8989
None,
90-
datetime.date(1988, 8, 22),
90+
datetime.date(2000, 1, 1), # winter(between)
91+
-12312.1,
92+
0,
93+
datetime.datetime.now(),
94+
),
95+
(
96+
3,
97+
"Johns",
98+
"Does",
99+
"San Franciscos",
100+
"USAs",
101+
datetime.date(2040, 7, 1), # summer(after)
102+
None,
103+
500,
104+
datetime.datetime.now(),
105+
),
106+
(
107+
4,
108+
"Bigs",
109+
"Heros",
110+
"San Fransokyos",
111+
None,
112+
datetime.date(2040, 1, 1), # winter(after)
91113
-12312.1,
92114
0,
93115
datetime.datetime.now(),
@@ -225,6 +247,12 @@ def __check_interop(self):
225247
if not HAS_INTEROP:
226248
self.skipTest("missing pandas or pyarrow modules")
227249

250+
def __convert_date(self, value):
251+
"""
252+
Converts a date to the format required by Arrow.
253+
"""
254+
return (value - datetime.datetime(1970, 1, 1)).total_seconds()
255+
228256
def __convert_to_array(self, data, typ):
229257
"""
230258
Convert raw data to an Arrow array using pyarrow.
@@ -237,11 +265,13 @@ def __convert_to_array(self, data, typ):
237265
elif isinstance(typ, pyarrow.TimestampType):
238266
if typ.unit == "s":
239267
data = [
240-
datetime.datetime(v.year, v.month, v.day).timestamp()
268+
self.__convert_date(
269+
datetime.datetime(v.year, v.month, v.day)
270+
)
241271
for v in data
242272
]
243273
else:
244-
data = [value.timestamp() * 1000000 for value in data]
274+
data = [self.__convert_date(value) * 1000000 for value in data]
245275
mask = [value is None for value in data]
246276
return pyarrow.array(data, typ, mask=mask)
247277

@@ -470,7 +500,7 @@ async def test_8115(self):
470500
ora_df = await self.conn.fetch_df_all(statement)
471501
col = ora_df.get_column_by_name("SALARY")
472502
self.assertEqual(col.size(), len(DATASET_2))
473-
self.assertEqual(col.null_count, 1)
503+
self.assertEqual(col.null_count, 2)
474504

475505
async def test_8116(self):
476506
"8116 - check unsupported error"

0 commit comments

Comments
 (0)