Skip to content

Commit a79d7ce

Browse files
authored
Perf/indexing status page (onyx-dot-app#5142)
* indexing status optimization first draft * refactor: update pagination logic and enhance UI for indexing status table * add index attempt pruning job and display federated connectors in index status page * update celery worker command to include index_attempt_cleanup queue * refactor: enhance indexing status table and remove deprecated components * mypy fix * address review comments * fix pagination reset issue * add TODO for optimizing connector materialization and performance in future deployments * enhance connector indexing status retrieval by adding 'get_all_connectors' option and updating pagination logic * refactor: transition to paginated connector indexing status retrieval and update related components * fix: initialize latest_index_attempt_docs_indexed to 0 in CCPairIndexingStatusTable component * feat: add mock connector file support for indexing status retrieval and update indexing_statuses type to Sequence * mypy fix * refactor: rename indexing status endpoint to simplify API and update related components
1 parent d7e0bf1 commit a79d7ce

File tree

24 files changed

+1525
-835
lines changed

24 files changed

+1525
-835
lines changed

.vscode/launch.template.jsonc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@
192192
"--loglevel=INFO",
193193
"--hostname=light@%n",
194194
"-Q",
195-
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert"
195+
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup"
196196
],
197197
"presentation": {
198198
"group": "2"

backend/onyx/background/celery/tasks/beat_schedule.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,15 @@
6262
"expires": BEAT_EXPIRES_DEFAULT,
6363
},
6464
},
65+
{
66+
"name": "check-for-index-attempt-cleanup",
67+
"task": OnyxCeleryTask.CHECK_FOR_INDEX_ATTEMPT_CLEANUP,
68+
"schedule": timedelta(hours=1),
69+
"options": {
70+
"priority": OnyxCeleryPriority.LOW,
71+
"expires": BEAT_EXPIRES_DEFAULT,
72+
},
73+
},
6574
{
6675
"name": "check-for-connector-deletion",
6776
"task": OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,

backend/onyx/background/celery/tasks/docprocessing/tasks.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
from onyx.background.indexing.checkpointing_utils import (
3333
get_index_attempts_with_old_checkpoints,
3434
)
35+
from onyx.background.indexing.index_attempt_utils import cleanup_index_attempts
36+
from onyx.background.indexing.index_attempt_utils import get_old_index_attempts
3537
from onyx.configs.app_configs import MANAGED_VESPA
3638
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
3739
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
@@ -109,6 +111,7 @@
109111
# Heartbeat timeout: if no heartbeat received for 30 minutes, consider it dead
110112
# This should be much longer than INDEXING_WORKER_HEARTBEAT_INTERVAL (30s)
111113
HEARTBEAT_TIMEOUT_SECONDS = 30 * 60 # 30 minutes
114+
INDEX_ATTEMPT_BATCH_SIZE = 500
112115

113116

114117
def _get_fence_validation_block_expiration() -> int:
@@ -987,6 +990,95 @@ def cleanup_checkpoint_task(
987990
)
988991

989992

993+
# primary
994+
@shared_task(
995+
name=OnyxCeleryTask.CHECK_FOR_INDEX_ATTEMPT_CLEANUP,
996+
soft_time_limit=300,
997+
bind=True,
998+
)
999+
def check_for_index_attempt_cleanup(self: Task, *, tenant_id: str) -> None:
1000+
"""Clean up old index attempts that are older than 7 days."""
1001+
locked = False
1002+
redis_client = get_redis_client(tenant_id=tenant_id)
1003+
lock: RedisLock = redis_client.lock(
1004+
OnyxRedisLocks.CHECK_INDEX_ATTEMPT_CLEANUP_BEAT_LOCK,
1005+
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
1006+
)
1007+
1008+
# these tasks should never overlap
1009+
if not lock.acquire(blocking=False):
1010+
task_logger.info(
1011+
f"check_for_index_attempt_cleanup - Lock not acquired: tenant={tenant_id}"
1012+
)
1013+
return None
1014+
1015+
try:
1016+
locked = True
1017+
batch_size = INDEX_ATTEMPT_BATCH_SIZE
1018+
with get_session_with_current_tenant() as db_session:
1019+
old_attempts = get_old_index_attempts(db_session)
1020+
# We need to batch this because during the initial run, the system might have a large number
1021+
# of index attempts since they were never deleted. After that, the number will be
1022+
# significantly lower.
1023+
if len(old_attempts) == 0:
1024+
task_logger.info(
1025+
"check_for_index_attempt_cleanup - No index attempts to cleanup"
1026+
)
1027+
return
1028+
1029+
for i in range(0, len(old_attempts), batch_size):
1030+
batch = old_attempts[i : i + batch_size]
1031+
task_logger.info(
1032+
f"check_for_index_attempt_cleanup - Cleaning up index attempts {len(batch)}"
1033+
)
1034+
self.app.send_task(
1035+
OnyxCeleryTask.CLEANUP_INDEX_ATTEMPT,
1036+
kwargs={
1037+
"index_attempt_ids": [attempt.id for attempt in batch],
1038+
"tenant_id": tenant_id,
1039+
},
1040+
queue=OnyxCeleryQueues.INDEX_ATTEMPT_CLEANUP,
1041+
priority=OnyxCeleryPriority.MEDIUM,
1042+
)
1043+
except Exception:
1044+
task_logger.exception("Unexpected exception during index attempt cleanup check")
1045+
return None
1046+
finally:
1047+
if locked:
1048+
if lock.owned():
1049+
lock.release()
1050+
else:
1051+
task_logger.error(
1052+
"check_for_index_attempt_cleanup - Lock not owned on completion: "
1053+
f"tenant={tenant_id}"
1054+
)
1055+
1056+
1057+
# light worker
1058+
@shared_task(
1059+
name=OnyxCeleryTask.CLEANUP_INDEX_ATTEMPT,
1060+
bind=True,
1061+
)
1062+
def cleanup_index_attempt_task(
1063+
self: Task, *, index_attempt_ids: list[int], tenant_id: str
1064+
) -> None:
1065+
"""Clean up an index attempt"""
1066+
start = time.monotonic()
1067+
1068+
try:
1069+
with get_session_with_current_tenant() as db_session:
1070+
cleanup_index_attempts(db_session, index_attempt_ids)
1071+
1072+
finally:
1073+
elapsed = time.monotonic() - start
1074+
1075+
task_logger.info(
1076+
f"cleanup_index_attempt_task completed: tenant_id={tenant_id} "
1077+
f"index_attempt_ids={index_attempt_ids} "
1078+
f"elapsed={elapsed:.2f}"
1079+
)
1080+
1081+
9901082
class DocumentProcessingBatch(BaseModel):
9911083
"""Data structure for a document processing batch."""
9921084

backend/onyx/background/indexing/checkpointing_utils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from sqlalchemy.orm import Session
77

88
from onyx.configs.constants import FileOrigin
9+
from onyx.configs.constants import NUM_DAYS_TO_KEEP_CHECKPOINTS
910
from onyx.connectors.interfaces import BaseConnector
1011
from onyx.connectors.interfaces import CheckpointedConnector
1112
from onyx.connectors.models import ConnectorCheckpoint
@@ -163,16 +164,16 @@ def get_latest_valid_checkpoint(
163164

164165

165166
def get_index_attempts_with_old_checkpoints(
166-
db_session: Session, days_to_keep: int = 7
167+
db_session: Session, days_to_keep: int = NUM_DAYS_TO_KEEP_CHECKPOINTS
167168
) -> list[IndexAttempt]:
168169
"""Get all index attempts with checkpoints older than the specified number of days.
169170
170171
Args:
171172
db_session: The database session
172-
days_to_keep: Number of days to keep checkpoints for (default: 7)
173+
days_to_keep: Number of days to keep checkpoints for (default: NUM_DAYS_TO_KEEP_CHECKPOINTS)
173174
174175
Returns:
175-
Number of checkpoints deleted
176+
List of IndexAttempt objects with old checkpoints
176177
"""
177178
cutoff_date = get_db_current_time(db_session) - timedelta(days=days_to_keep)
178179

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from datetime import timedelta
2+
3+
from sqlalchemy.orm import Session
4+
5+
from onyx.configs.constants import NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
6+
from onyx.db.engine.time_utils import get_db_current_time
7+
from onyx.db.models import IndexAttempt
8+
9+
10+
def get_old_index_attempts(
11+
db_session: Session, days_to_keep: int = NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
12+
) -> list[IndexAttempt]:
13+
"""Get all index attempts older than the specified number of days."""
14+
cutoff_date = get_db_current_time(db_session) - timedelta(days=days_to_keep)
15+
return (
16+
db_session.query(IndexAttempt)
17+
.filter(IndexAttempt.time_created < cutoff_date)
18+
.all()
19+
)
20+
21+
22+
def cleanup_index_attempts(db_session: Session, index_attempt_ids: list[int]) -> None:
23+
"""Clean up multiple index attempts"""
24+
db_session.query(IndexAttempt).filter(
25+
IndexAttempt.id.in_(index_attempt_ids)
26+
).delete(synchronize_session=False)
27+
db_session.commit()

backend/onyx/configs/constants.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ class OnyxCeleryQueues:
326326
CONNECTOR_DELETION = "connector_deletion"
327327
LLM_MODEL_UPDATE = "llm_model_update"
328328
CHECKPOINT_CLEANUP = "checkpoint_cleanup"
329-
329+
INDEX_ATTEMPT_CLEANUP = "index_attempt_cleanup"
330330
# Heavy queue
331331
CONNECTOR_PRUNING = "connector_pruning"
332332
CONNECTOR_DOC_PERMISSIONS_SYNC = "connector_doc_permissions_sync"
@@ -354,6 +354,7 @@ class OnyxRedisLocks:
354354
CHECK_PRUNE_BEAT_LOCK = "da_lock:check_prune_beat"
355355
CHECK_INDEXING_BEAT_LOCK = "da_lock:check_indexing_beat"
356356
CHECK_CHECKPOINT_CLEANUP_BEAT_LOCK = "da_lock:check_checkpoint_cleanup_beat"
357+
CHECK_INDEX_ATTEMPT_CLEANUP_BEAT_LOCK = "da_lock:check_index_attempt_cleanup_beat"
357358
CHECK_CONNECTOR_DOC_PERMISSIONS_SYNC_BEAT_LOCK = (
358359
"da_lock:check_connector_doc_permissions_sync_beat"
359360
)
@@ -455,6 +456,10 @@ class OnyxCeleryTask:
455456
CHECK_FOR_CHECKPOINT_CLEANUP = "check_for_checkpoint_cleanup"
456457
CLEANUP_CHECKPOINT = "cleanup_checkpoint"
457458

459+
# Connector index attempt cleanup
460+
CHECK_FOR_INDEX_ATTEMPT_CLEANUP = "check_for_index_attempt_cleanup"
461+
CLEANUP_INDEX_ATTEMPT = "cleanup_index_attempt"
462+
458463
MONITOR_BACKGROUND_PROCESSES = "monitor_background_processes"
459464
MONITOR_CELERY_QUEUES = "monitor_celery_queues"
460465
MONITOR_PROCESS_MEMORY = "monitor_process_memory"
@@ -512,3 +517,8 @@ class OnyxCeleryTask:
512517
class OnyxCallTypes(str, Enum):
513518
FIREFLIES = "FIREFLIES"
514519
GONG = "GONG"
520+
521+
522+
NUM_DAYS_TO_KEEP_CHECKPOINTS = 7
523+
# checkpoints are queried based on index attempts, so we need to keep index attempts for one more day
524+
NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS = NUM_DAYS_TO_KEEP_CHECKPOINTS + 1

backend/onyx/db/connector_credential_pair.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,12 +116,13 @@ def get_connector_credential_pairs_for_user(
116116
eager_load_credential: bool = False,
117117
eager_load_user: bool = False,
118118
include_user_files: bool = False,
119+
order_by_desc: bool = False,
120+
source: DocumentSource | None = None,
119121
) -> list[ConnectorCredentialPair]:
120122
if eager_load_user:
121123
assert (
122124
eager_load_credential
123125
), "eager_load_credential must be True if eager_load_user is True"
124-
125126
stmt = select(ConnectorCredentialPair).distinct()
126127

127128
if eager_load_connector:
@@ -134,12 +135,21 @@ def get_connector_credential_pairs_for_user(
134135
stmt = stmt.options(load_opts)
135136

136137
stmt = _add_user_filters(stmt, user, get_editable)
138+
139+
if source:
140+
stmt = stmt.join(ConnectorCredentialPair.connector).where(
141+
Connector.source == source.value
142+
)
143+
137144
if ids:
138145
stmt = stmt.where(ConnectorCredentialPair.id.in_(ids))
139146

140147
if not include_user_files:
141148
stmt = stmt.where(ConnectorCredentialPair.is_user_file != True) # noqa: E712
142149

150+
if order_by_desc:
151+
stmt = stmt.order_by(desc(ConnectorCredentialPair.id))
152+
143153
return list(db_session.scalars(stmt).unique().all())
144154

145155

@@ -153,16 +163,20 @@ def get_connector_credential_pairs_for_user_parallel(
153163
eager_load_connector: bool = False,
154164
eager_load_credential: bool = False,
155165
eager_load_user: bool = False,
166+
order_by_desc: bool = False,
167+
source: DocumentSource | None = None,
156168
) -> list[ConnectorCredentialPair]:
157169
with get_session_with_current_tenant() as db_session:
158170
return get_connector_credential_pairs_for_user(
159-
db_session,
160-
user,
161-
get_editable,
162-
ids,
163-
eager_load_connector,
164-
eager_load_credential,
165-
eager_load_user,
171+
db_session=db_session,
172+
user=user,
173+
get_editable=get_editable,
174+
ids=ids,
175+
eager_load_connector=eager_load_connector,
176+
eager_load_credential=eager_load_credential,
177+
eager_load_user=eager_load_user,
178+
order_by_desc=order_by_desc,
179+
source=source,
166180
)
167181

168182

backend/onyx/db/federated.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from sqlalchemy.orm import Session
99

1010
from onyx.configs.constants import FederatedConnectorSource
11+
from onyx.db.engine.sql_engine import get_session_with_current_tenant
1112
from onyx.db.models import DocumentSet
1213
from onyx.db.models import FederatedConnector
1314
from onyx.db.models import FederatedConnector__DocumentSet
@@ -39,6 +40,11 @@ def fetch_all_federated_connectors(db_session: Session) -> list[FederatedConnect
3940
return list(result.scalars().all())
4041

4142

43+
def fetch_all_federated_connectors_parallel() -> list[FederatedConnector]:
44+
with get_session_with_current_tenant() as db_session:
45+
return fetch_all_federated_connectors(db_session)
46+
47+
4248
def validate_federated_connector_credentials(
4349
source: FederatedConnectorSource,
4450
credentials: dict[str, Any],

0 commit comments

Comments
 (0)