Skip to content

Commit 95dabfa

Browse files
authored
fix: Add back Teams' replies processing (#4744)
* Add replies to document construction and edit tests * Update tests * Add replies processing to teams * Fix test * Add try-except block around potential failure * Update entity-id during ConnectorFailure raise
1 parent e92c418 commit 95dabfa

File tree

2 files changed

+48
-19
lines changed

2 files changed

+48
-19
lines changed

backend/onyx/connectors/teams/connector.py

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@
2525
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
2626
from onyx.connectors.models import BasicExpertInfo
2727
from onyx.connectors.models import ConnectorCheckpoint
28+
from onyx.connectors.models import ConnectorFailure
2829
from onyx.connectors.models import ConnectorMissingCredentialError
2930
from onyx.connectors.models import Document
31+
from onyx.connectors.models import EntityFailure
3032
from onyx.connectors.models import TextSection
3133
from onyx.file_processing.html_utils import parse_html_page_basic
3234
from onyx.utils.logger import setup_logger
@@ -191,7 +193,7 @@ def load_from_checkpoint(
191193
)
192194

193195
docs = [
194-
_collect_document_for_channel_id(
196+
_collect_documents_for_channel(
195197
graph_client=self.graph_client,
196198
team=team,
197199
channel=channel,
@@ -458,18 +460,17 @@ def _collect_all_channels_from_team(
458460
return channels
459461

460462

461-
def _collect_document_for_channel_id(
463+
def _collect_documents_for_channel(
462464
graph_client: GraphClient,
463465
team: Team,
464466
channel: Channel,
465467
start: SecondsSinceUnixEpoch,
466468
end: SecondsSinceUnixEpoch,
467-
) -> Iterator[Document | None]:
469+
) -> Iterator[Document | None | ConnectorFailure]:
468470
"""
469-
This function yields just one singular `Document`.
471+
This function yields an iterator of `Document`s, where each `Document` corresponds to a "thread".
470472
471-
The reason why this function returns an instance of `Iterator` is because
472-
that is what `parallel_yield` expects. We want this to be lazily evaluated.
473+
A "thread" is the conjunction of the "root" message and all of its replies.
473474
"""
474475

475476
# Server-side filter conditions are not supported on the chat-messages API.
@@ -483,19 +484,37 @@ def _collect_document_for_channel_id(
483484
page_loaded=lambda _: None
484485
).execute_query()
485486

486-
thread = [
487-
message
488-
for message in message_collection
489-
if _filter_message(message=message, start=start, end=end)
490-
]
487+
for message in message_collection:
488+
if not message.id:
489+
continue
491490

492-
yield _convert_thread_to_document(
493-
channel=channel,
494-
thread=thread,
495-
)
491+
if not _should_process_message(message=message, start=start, end=end):
492+
continue
493+
494+
try:
495+
replies = list(message.replies.get_all().execute_query())
496+
thread = [message]
497+
thread.extend(replies[::-1])
498+
499+
# Note:
500+
# We convert an entire *thread* (including the root message and its replies) into one, singular `Document`.
501+
# I.e., we don't convert each individual message and each individual reply into their own individual `Document`s.
502+
if doc := _convert_thread_to_document(
503+
channel=channel,
504+
thread=thread,
505+
):
506+
yield doc
507+
except Exception as e:
508+
yield ConnectorFailure(
509+
failed_entity=EntityFailure(
510+
entity_id=message.id,
511+
),
512+
failure_message=f"Retrieval of message and its replies failed; {channel.id=} {message.id}",
513+
exception=e,
514+
)
496515

497516

498-
def _filter_message(
517+
def _should_process_message(
499518
message: ChatMessage,
500519
start: SecondsSinceUnixEpoch,
501520
end: SecondsSinceUnixEpoch,

backend/tests/daily/connectors/teams/test_teams_connector.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,24 @@ def teams_connector(
4848
@pytest.mark.parametrize(
4949
"teams_connector,expected_messages",
5050
[
51-
[["Onyx-Testing"], set(["This is the first message in Onyx-Testing ..."])],
51+
[
52+
["Onyx-Testing"],
53+
set(
54+
[
55+
"This is the first message in Onyx-Testing ...This is a reply!This is a second reply.Third.4th.5",
56+
"Testing body.",
57+
]
58+
),
59+
],
5260
[
5361
["Onyx"],
5462
set(
5563
[
64+
"yeah!",
65+
"but not least",
5666
"Hello, world!",
57-
"My favorite color is red.\n\xa0\nPablos favorite color is blue",
58-
"but not leastyeah!",
67+
"My favorite color is red.\n\xa0\nPablos favorite color is bluePika's favorite color is greenbut"
68+
" it might also be yellow",
5969
]
6070
),
6171
],

0 commit comments

Comments
 (0)