lo

evan-onyx · evan-onyx · commit ca8884f09c1f · 2025-05-04T13:10:06.000-07:00
diff --git a/backend/onyx/background/indexing/run_indexing.py b/backend/onyx/background/indexing/run_indexing.py
@@ -261,7 +261,7 @@ def _run_indexing(
     3. Updates Postgres to record the indexed documents + the outcome of this run
     """
     start_time = time.monotonic()  # jsut used for logging
-
+    logger.error("Starting indexing run")
     with get_session_with_current_tenant() as db_session_temp:
         index_attempt_start = get_index_attempt(db_session_temp, index_attempt_id)
         if not index_attempt_start:
@@ -315,6 +315,7 @@ def _run_indexing(
             # don't go into "negative" time if we've never indexed before
             window_start = datetime.fromtimestamp(0, tz=timezone.utc)
 
+        logger.error("Getting most recent attempt")
         most_recent_attempt = next(
             iter(
                 get_recent_completed_attempts_for_cc_pair(
@@ -326,6 +327,7 @@ def _run_indexing(
             ),
             None,
         )
+        logger.error(f"Most recent attempt: {most_recent_attempt}")
         # if the last attempt failed, try and use the same window. This is necessary
         # to ensure correctness with checkpointing. If we don't do this, things like
         # new slack channels could be missed (since existing slack channels are
@@ -361,6 +363,7 @@ def _run_indexing(
         httpx_client=HttpxPool.get("vespa"),
     )
 
+    logger.error("Building indexing pipeline")
     indexing_pipeline = build_indexing_pipeline(
         embedder=embedding_model,
         information_content_classification_model=information_content_classification_model,
@@ -782,7 +785,7 @@ def run_indexing_entrypoint(
     callback: IndexingHeartbeatInterface | None = None,
 ) -> None:
     """Don't swallow exceptions here ... propagate them up."""
-
+    logger.error("Starting indexing run: run_indexing_entrypoint")
     if is_ee:
         global_version.set_ee()
 
diff --git a/backend/onyx/indexing/indexing_pipeline.py b/backend/onyx/indexing/indexing_pipeline.py
@@ -721,6 +721,8 @@ def index_doc_batch(
     Returns a tuple where the first element is the number of new docs and the
     second element is the number of chunks."""
 
+    logger.error(f"index_doc_batch: {len(document_batch)} documents")
+
     no_access = DocumentAccess.build(
         user_emails=[],
         user_groups=[],
@@ -1060,6 +1062,7 @@ def build_indexing_pipeline(
     callback: IndexingHeartbeatInterface | None = None,
 ) -> IndexingPipelineProtocol:
     """Builds a pipeline which takes in a list (batch) of docs and indexes them."""
+    logger.error("Building indexing pipeline")
     all_search_settings = get_active_search_settings(db_session)
     if (
         all_search_settings.secondary
@@ -1070,10 +1073,12 @@ def build_indexing_pipeline(
         search_settings = all_search_settings.primary
 
     multipass_config = get_multipass_config(search_settings)
+    logger.error(f"multipass_config: {multipass_config}")
 
     enable_contextual_rag = (
         search_settings.enable_contextual_rag or ENABLE_CONTEXTUAL_RAG
     )
+    logger.error(f"enable_contextual_rag: {enable_contextual_rag}")
     llm = None
     if enable_contextual_rag:
         llm = get_llm_for_contextual_rag(