From ad69a82ebf1b7d86a344da53fcdc09b7da7b9e17 Mon Sep 17 00:00:00 2001 From: Charlie Luo Date: Thu, 28 Aug 2025 13:59:15 -0700 Subject: [PATCH 1/3] stop truncating timestamp_ms in search_issues --- .../processors/search_issues_processor.py | 9 +++++-- .../datasets/test_search_issues_processor.py | 10 ++++--- tests/test_search_issues_api.py | 27 +++++++++++++++++++ 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/snuba/datasets/processors/search_issues_processor.py b/snuba/datasets/processors/search_issues_processor.py index 16336b34dcf..f30bd8a6bc2 100644 --- a/snuba/datasets/processors/search_issues_processor.py +++ b/snuba/datasets/processors/search_issues_processor.py @@ -1,6 +1,6 @@ import numbers import uuid -from datetime import datetime +from datetime import datetime, timezone from typing import ( Any, Dict, @@ -228,7 +228,12 @@ def _process_transaction_duration( def _process_timestamp_ms( self, event_data: IssueEventData, processed: MutableMapping[str, Any] ) -> None: - processed["timestamp_ms"] = processed["client_timestamp"] + client_timestamp = processed["client_timestamp"] + # NOTE: we do this conversion because the JSONRowEncoder will strip out milliseconds out + # of datetime objects specifically. To work around that, we convert the datetime to a + # timestamp in milliseconds + client_timestamp = client_timestamp.replace(tzinfo=timezone.utc) + processed["timestamp_ms"] = int(client_timestamp.timestamp() * 1000) def process_insert_v1( self, event: SearchIssueEvent, metadata: KafkaMessageMetadata diff --git a/tests/datasets/test_search_issues_processor.py b/tests/datasets/test_search_issues_processor.py index 5c075eb1ff7..7cf47205a36 100644 --- a/tests/datasets/test_search_issues_processor.py +++ b/tests/datasets/test_search_issues_processor.py @@ -1,7 +1,7 @@ import copy import uuid from collections import OrderedDict -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import Any, MutableMapping, Union import pytest @@ -128,8 +128,12 @@ def test_extract_timestamp_ms(self, message_base): processed = self.process_message(message_base) self.assert_required_columns(processed) insert_row = processed.rows[0] - assert insert_row["timestamp_ms"].isoformat() + "Z" == message_base["datetime"] - assert insert_row["timestamp_ms"] == insert_row["client_timestamp"] + client_timestamp_utc = insert_row["client_timestamp"].replace( + tzinfo=timezone.utc + ) + assert insert_row["timestamp_ms"] == int( + client_timestamp_utc.timestamp() * 1000 + ) def test_extract_user(self, message_base): message_with_user = message_base diff --git a/tests/test_search_issues_api.py b/tests/test_search_issues_api.py index becb0418b92..4966e034b5a 100644 --- a/tests/test_search_issues_api.py +++ b/tests/test_search_issues_api.py @@ -479,3 +479,30 @@ def test_eventstream_query_message(self) -> None: assert response.status_code == 200, data assert data["stats"]["consistent"] assert data["data"] == [{"project_id": 1, "message": message}] + + def test_eventstream_timestamp_ms_precision(self) -> None: + """Test that timestamp_ms preserves millisecond precision through the full eventstream""" + now = datetime.utcnow() + now_ms = now + timedelta(milliseconds=123) + + insert_row = base_insert_event(now_ms) + insert_row[2]["data"]["client_timestamp"] = now_ms + + response = self.app.post( + "/tests/search_issues/eventstream", data=json.dumps(insert_row) + ) + assert response.status_code == 200 + + from_date = (now - timedelta(days=1)).isoformat() + to_date = (now + timedelta(days=1)).isoformat() + response = self.post_query( + f""" + MATCH (search_issues) + SELECT timestamp_ms + WHERE project_id = 1 + AND timestamp >= toDateTime('{from_date}') AND timestamp < toDateTime('{to_date}') + """ + ) + data = json.loads(response.data) + + assert data["data"] == [{"timestamp_ms": now_ms}] From 6bb4fbaa6ae30eb89ab2b3bbc233cb988c27e92a Mon Sep 17 00:00:00 2001 From: Charlie Luo Date: Thu, 28 Aug 2025 16:00:08 -0700 Subject: [PATCH 2/3] fix test --- tests/test_search_issues_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_search_issues_api.py b/tests/test_search_issues_api.py index 4966e034b5a..76a36ec2ac5 100644 --- a/tests/test_search_issues_api.py +++ b/tests/test_search_issues_api.py @@ -483,10 +483,10 @@ def test_eventstream_query_message(self) -> None: def test_eventstream_timestamp_ms_precision(self) -> None: """Test that timestamp_ms preserves millisecond precision through the full eventstream""" now = datetime.utcnow() - now_ms = now + timedelta(milliseconds=123) + now_ms = now.replace(microsecond=0) + timedelta(milliseconds=123) insert_row = base_insert_event(now_ms) - insert_row[2]["data"]["client_timestamp"] = now_ms + insert_row[2]["datetime"] = now_ms.isoformat() + "Z" response = self.app.post( "/tests/search_issues/eventstream", data=json.dumps(insert_row) From 6e18f6fe48de1de2d2595a4fc5428a45896fe410 Mon Sep 17 00:00:00 2001 From: Charlie Luo Date: Thu, 28 Aug 2025 17:16:00 -0700 Subject: [PATCH 3/3] fix tests --- tests/test_search_issues_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_search_issues_api.py b/tests/test_search_issues_api.py index 76a36ec2ac5..5def134d376 100644 --- a/tests/test_search_issues_api.py +++ b/tests/test_search_issues_api.py @@ -505,4 +505,4 @@ def test_eventstream_timestamp_ms_precision(self) -> None: ) data = json.loads(response.data) - assert data["data"] == [{"timestamp_ms": now_ms}] + assert datetime.fromisoformat(data["data"][0]["timestamp_ms"]) == now_ms