diff --git a/backend/onyx/background/celery/tasks/indexing/tasks.py b/backend/onyx/background/celery/tasks/indexing/tasks.py index c361d838680..a26d58599b6 100644 --- a/backend/onyx/background/celery/tasks/indexing/tasks.py +++ b/backend/onyx/background/celery/tasks/indexing/tasks.py @@ -367,21 +367,23 @@ def monitor_ccpair_indexing_taskset( redis_connector_index.reset() - # mark the CC Pair as `ACTIVE` if it's not already + # mark the CC Pair as `ACTIVE` if the attempt was a success and the + # CC Pair is not active not already + # This should never technically be in this state, but we'll handle it anyway + index_attempt = get_index_attempt(db_session, payload.index_attempt_id) + index_attempt_is_successful = index_attempt and index_attempt.status.is_successful() if ( - # it should never technically be in this state, but we'll handle it anyway - cc_pair.status == ConnectorCredentialPairStatus.SCHEDULED + index_attempt_is_successful + and cc_pair.status == ConnectorCredentialPairStatus.SCHEDULED or cc_pair.status == ConnectorCredentialPairStatus.INITIAL_INDEXING ): cc_pair.status = ConnectorCredentialPairStatus.ACTIVE db_session.commit() # if the index attempt is successful, clear the repeated error state - if cc_pair.in_repeated_error_state: - index_attempt = get_index_attempt(db_session, payload.index_attempt_id) - if index_attempt and index_attempt.status.is_successful(): - cc_pair.in_repeated_error_state = False - db_session.commit() + if cc_pair.in_repeated_error_state and index_attempt_is_successful: + cc_pair.in_repeated_error_state = False + db_session.commit() @shared_task( diff --git a/backend/onyx/background/indexing/checkpointing_utils.py b/backend/onyx/background/indexing/checkpointing_utils.py index 15981b77a91..ef3c9a3ec4a 100644 --- a/backend/onyx/background/indexing/checkpointing_utils.py +++ b/backend/onyx/background/indexing/checkpointing_utils.py @@ -160,11 +160,6 @@ def get_latest_valid_checkpoint( f"{latest_valid_checkpoint_candidate.id}. Previous checkpoint: " f"{previous_checkpoint}" ) - save_checkpoint( - db_session=db_session, - index_attempt_id=latest_valid_checkpoint_candidate.id, - checkpoint=previous_checkpoint, - ) return previous_checkpoint diff --git a/backend/onyx/background/indexing/run_indexing.py b/backend/onyx/background/indexing/run_indexing.py index 41cbeb931e0..583f2e9a70c 100644 --- a/backend/onyx/background/indexing/run_indexing.py +++ b/backend/onyx/background/indexing/run_indexing.py @@ -423,6 +423,14 @@ def _run_indexing( connector=connector_runner.connector, ) + # save the initial checkpoint to have a proper record of the + # "last used checkpoint" + save_checkpoint( + db_session=db_session_temp, + index_attempt_id=index_attempt_id, + checkpoint=checkpoint, + ) + unresolved_errors = get_index_attempt_errors_for_cc_pair( cc_pair_id=ctx.cc_pair_id, unresolved_only=True,