From a353019e3521ead2ad9e0e5446be201902621fac Mon Sep 17 00:00:00 2001 From: "Richard Kuo (Onyx)" Date: Thu, 1 May 2025 17:38:11 -0700 Subject: [PATCH 1/5] don't log all channels --- backend/onyx/connectors/slack/connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/onyx/connectors/slack/connector.py b/backend/onyx/connectors/slack/connector.py index 884c37d2350..589a0171489 100644 --- a/backend/onyx/connectors/slack/connector.py +++ b/backend/onyx/connectors/slack/connector.py @@ -292,7 +292,7 @@ def filter_channels( if channel not in all_channel_names: raise ValueError( f"Channel '{channel}' not found in workspace. " - f"Available channels: {all_channel_names}" + f"Available channels (max 50 shown): {all_channel_names[:50]}" ) return [ From 1df931364d6a8207ab8f4445f870c7a43ecd04c4 Mon Sep 17 00:00:00 2001 From: "Richard Kuo (Onyx)" Date: Thu, 1 May 2025 17:47:37 -0700 Subject: [PATCH 2/5] print number of channels --- backend/onyx/connectors/slack/connector.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/onyx/connectors/slack/connector.py b/backend/onyx/connectors/slack/connector.py index 589a0171489..3a6172973d4 100644 --- a/backend/onyx/connectors/slack/connector.py +++ b/backend/onyx/connectors/slack/connector.py @@ -1,5 +1,6 @@ import contextvars import copy +import itertools import re from collections.abc import Callable from collections.abc import Generator @@ -292,7 +293,8 @@ def filter_channels( if channel not in all_channel_names: raise ValueError( f"Channel '{channel}' not found in workspace. " - f"Available channels (max 50 shown): {all_channel_names[:50]}" + f"Available channels (max 50 of {len(all_channel_names)} shown): " + f"{[itertools.islice(all_channel_names, 50)]}" ) return [ From 5b4ca8e5a9cd80aa757174ef8f38eeeda2c06fe8 Mon Sep 17 00:00:00 2001 From: "Richard Kuo (Onyx)" Date: Thu, 1 May 2025 18:21:19 -0700 Subject: [PATCH 3/5] sanitize indexing exception messages --- backend/onyx/background/celery/tasks/indexing/tasks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/onyx/background/celery/tasks/indexing/tasks.py b/backend/onyx/background/celery/tasks/indexing/tasks.py index 1f2dc482f59..d8465f026a3 100644 --- a/backend/onyx/background/celery/tasks/indexing/tasks.py +++ b/backend/onyx/background/celery/tasks/indexing/tasks.py @@ -896,7 +896,11 @@ def connector_indexing_task( f"cc_pair={cc_pair_id} " f"search_settings={search_settings_id}" ) - raise e + + # special bulletproofing ... truncate long exception messages + sanitized_e = type(e)(str(e)[:1024]) + sanitized_e.__traceback__ = e.__traceback__ + raise sanitized_e finally: if lock.owned(): From fc5279e0b3bfbb1aa416effc01e6f178d80d0be7 Mon Sep 17 00:00:00 2001 From: "Richard Kuo (Onyx)" Date: Thu, 1 May 2025 18:35:31 -0700 Subject: [PATCH 4/5] harden vespa index swap --- backend/onyx/db/swap_index.py | 41 +++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/backend/onyx/db/swap_index.py b/backend/onyx/db/swap_index.py index 9a438ab55ea..1517d74ec0e 100644 --- a/backend/onyx/db/swap_index.py +++ b/backend/onyx/db/swap_index.py @@ -1,5 +1,8 @@ +import time + from sqlalchemy.orm import Session +from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP from onyx.configs.constants import KV_REINDEX_KEY from onyx.db.connector_credential_pair import get_connector_credential_pairs from onyx.db.connector_credential_pair import resync_cc_pair @@ -73,13 +76,37 @@ def _perform_index_swap( # remove the old index from the vector db document_index = get_default_document_index(secondary_search_settings, None) - document_index.ensure_indices_exist( - primary_embedding_dim=secondary_search_settings.final_embedding_dim, - primary_embedding_precision=secondary_search_settings.embedding_precision, - # just finished swap, no more secondary index - secondary_index_embedding_dim=None, - secondary_index_embedding_precision=None, - ) + + WAIT_SECONDS = 5 + + success = False + for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP): + try: + logger.notice( + f"Vespa index swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..." + ) + document_index.ensure_indices_exist( + primary_embedding_dim=secondary_search_settings.final_embedding_dim, + primary_embedding_precision=secondary_search_settings.embedding_precision, + # just finished swap, no more secondary index + secondary_index_embedding_dim=None, + secondary_index_embedding_precision=None, + ) + + logger.notice("Vespa index swap complete.") + success = True + except Exception: + logger.exception( + f"Vespa index swap did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds." + ) + time.sleep(WAIT_SECONDS) + + if not success: + logger.error( + f"Vespa index swap did not succeed. Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})" + ) + + return def check_and_perform_index_swap(db_session: Session) -> SearchSettings | None: From ff46f4e5f13c6c3420a8711890f38865a42f37ef Mon Sep 17 00:00:00 2001 From: "Richard Kuo (Onyx)" Date: Thu, 1 May 2025 18:46:39 -0700 Subject: [PATCH 5/5] use constants and fix list generation --- backend/onyx/connectors/slack/connector.py | 7 +++++-- backend/onyx/connectors/teams/connector.py | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/backend/onyx/connectors/slack/connector.py b/backend/onyx/connectors/slack/connector.py index 3a6172973d4..83f35fc55a2 100644 --- a/backend/onyx/connectors/slack/connector.py +++ b/backend/onyx/connectors/slack/connector.py @@ -293,8 +293,9 @@ def filter_channels( if channel not in all_channel_names: raise ValueError( f"Channel '{channel}' not found in workspace. " - f"Available channels (max 50 of {len(all_channel_names)} shown): " - f"{[itertools.islice(all_channel_names, 50)]}" + f"Available channels (Showing {len(all_channel_names)} of " + f"{min(len(all_channel_names), SlackConnector.MAX_CHANNELS_TO_LOG)}): " + f"{list(itertools.islice(all_channel_names, SlackConnector.MAX_CHANNELS_TO_LOG))}" ) return [ @@ -515,6 +516,8 @@ class SlackConnector( MAX_RETRIES = 7 # arbitrarily selected + MAX_CHANNELS_TO_LOG = 50 + def __init__( self, channels: list[str] | None = None, diff --git a/backend/onyx/connectors/teams/connector.py b/backend/onyx/connectors/teams/connector.py index 47b3b9db38d..ba124c1fa7e 100644 --- a/backend/onyx/connectors/teams/connector.py +++ b/backend/onyx/connectors/teams/connector.py @@ -178,6 +178,8 @@ def _convert_thread_to_document( class TeamsConnector(LoadConnector, PollConnector): + MAX_CHANNELS_TO_LOG = 50 + def __init__( self, batch_size: int = INDEX_BATCH_SIZE, @@ -298,7 +300,11 @@ def _fetch_from_teams( channels = _get_channels_from_teams( teams=teams, ) - logger.debug(f"Found available channels: {[c.id for c in channels]}") + + logger.debug( + f"Found available channels (max {TeamsConnector.MAX_CHANNELS_TO_LOG} shown): " + f"{[c.id for c in channels[:TeamsConnector.MAX_CHANNELS_TO_LOG]]}" + ) if not channels: msg = "No channels found." logger.error(msg)