1
+ from datetime import datetime
2
+ from datetime import timezone
3
+
1
4
import redis
2
5
from celery import Celery
3
6
from celery import shared_task
8
11
9
12
from danswer .background .celery .apps .app_base import task_logger
10
13
from danswer .background .celery .celery_redis import RedisConnectorDeletion
14
+ from danswer .background .celery .celery_redis import RedisConnectorIndexing
15
+ from danswer .background .celery .celery_redis import RedisConnectorPruning
16
+ from danswer .background .celery .celery_redis import RedisConnectorStop
17
+ from danswer .background .celery .tasks .shared .RedisConnectorDeletionFenceData import (
18
+ RedisConnectorDeletionFenceData ,
19
+ )
11
20
from danswer .configs .app_configs import JOB_TIMEOUT
12
21
from danswer .configs .constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
13
22
from danswer .configs .constants import DanswerRedisLocks
14
23
from danswer .db .connector_credential_pair import get_connector_credential_pair_from_id
15
24
from danswer .db .connector_credential_pair import get_connector_credential_pairs
16
25
from danswer .db .engine import get_session_with_tenant
17
26
from danswer .db .enums import ConnectorCredentialPairStatus
27
+ from danswer .db .search_settings import get_all_search_settings
18
28
from danswer .redis .redis_pool import get_redis_client
19
29
20
30
31
+ class TaskDependencyError (RuntimeError ):
32
+ """Raised to the caller to indicate dependent tasks are running that would interfere
33
+ with connector deletion."""
34
+
35
+
21
36
@shared_task (
22
37
name = "check_for_connector_deletion_task" ,
23
38
soft_time_limit = JOB_TIMEOUT ,
@@ -37,17 +52,30 @@ def check_for_connector_deletion_task(self: Task, *, tenant_id: str | None) -> N
37
52
if not lock_beat .acquire (blocking = False ):
38
53
return
39
54
55
+ # collect cc_pair_ids
40
56
cc_pair_ids : list [int ] = []
41
57
with get_session_with_tenant (tenant_id ) as db_session :
42
58
cc_pairs = get_connector_credential_pairs (db_session )
43
59
for cc_pair in cc_pairs :
44
60
cc_pair_ids .append (cc_pair .id )
45
61
62
+ # try running cleanup on the cc_pair_ids
46
63
for cc_pair_id in cc_pair_ids :
47
64
with get_session_with_tenant (tenant_id ) as db_session :
48
- try_generate_document_cc_pair_cleanup_tasks (
49
- self .app , cc_pair_id , db_session , r , lock_beat , tenant_id
50
- )
65
+ rcs = RedisConnectorStop (cc_pair_id )
66
+ try :
67
+ try_generate_document_cc_pair_cleanup_tasks (
68
+ self .app , cc_pair_id , db_session , r , lock_beat , tenant_id
69
+ )
70
+ except TaskDependencyError as e :
71
+ # this means we wanted to start deleting but dependent tasks were running
72
+ # Leave a stop signal to clear indexing and pruning tasks more quickly
73
+ task_logger .info (str (e ))
74
+ r .set (rcs .fence_key , cc_pair_id )
75
+ else :
76
+ # clear the stop signal if it exists ... no longer needed
77
+ r .delete (rcs .fence_key )
78
+
51
79
except SoftTimeLimitExceeded :
52
80
task_logger .info (
53
81
"Soft time limit exceeded, task is being terminated gracefully."
@@ -70,6 +98,10 @@ def try_generate_document_cc_pair_cleanup_tasks(
70
98
"""Returns an int if syncing is needed. The int represents the number of sync tasks generated.
71
99
Note that syncing can still be required even if the number of sync tasks generated is zero.
72
100
Returns None if no syncing is required.
101
+
102
+ Will raise TaskDependencyError if dependent tasks such as indexing and pruning are
103
+ still running. In our case, the caller reacts by setting a stop signal in Redis to
104
+ exit those tasks as quickly as possible.
73
105
"""
74
106
75
107
lock_beat .reacquire ()
@@ -90,28 +122,63 @@ def try_generate_document_cc_pair_cleanup_tasks(
90
122
if cc_pair .status != ConnectorCredentialPairStatus .DELETING :
91
123
return None
92
124
93
- # add tasks to celery and build up the task set to monitor in redis
94
- r .delete (rcd .taskset_key )
95
-
96
- # Add all documents that need to be updated into the queue
97
- task_logger .info (
98
- f"RedisConnectorDeletion.generate_tasks starting. cc_pair_id={ cc_pair .id } "
125
+ # set a basic fence to start
126
+ fence_value = RedisConnectorDeletionFenceData (
127
+ num_tasks = None ,
128
+ submitted = datetime .now (timezone .utc ),
99
129
)
100
- tasks_generated = rcd .generate_tasks (app , db_session , r , lock_beat , tenant_id )
101
- if tasks_generated is None :
130
+ r .set (rcd .fence_key , fence_value .model_dump_json ())
131
+
132
+ try :
133
+ # do not proceed if connector indexing or connector pruning are running
134
+ search_settings_list = get_all_search_settings (db_session )
135
+ for search_settings in search_settings_list :
136
+ rci = RedisConnectorIndexing (cc_pair_id , search_settings .id )
137
+ if r .get (rci .fence_key ):
138
+ raise TaskDependencyError (
139
+ f"Connector deletion - Delayed (indexing in progress): "
140
+ f"cc_pair={ cc_pair_id } "
141
+ f"search_settings={ search_settings .id } "
142
+ )
143
+
144
+ rcp = RedisConnectorPruning (cc_pair_id )
145
+ if r .get (rcp .fence_key ):
146
+ raise TaskDependencyError (
147
+ f"Connector deletion - Delayed (pruning in progress): "
148
+ f"cc_pair={ cc_pair_id } "
149
+ )
150
+
151
+ # add tasks to celery and build up the task set to monitor in redis
152
+ r .delete (rcd .taskset_key )
153
+
154
+ # Add all documents that need to be updated into the queue
155
+ task_logger .info (
156
+ f"RedisConnectorDeletion.generate_tasks starting. cc_pair={ cc_pair_id } "
157
+ )
158
+ tasks_generated = rcd .generate_tasks (app , db_session , r , lock_beat , tenant_id )
159
+ if tasks_generated is None :
160
+ raise ValueError ("RedisConnectorDeletion.generate_tasks returned None" )
161
+ except TaskDependencyError :
162
+ r .delete (rcd .fence_key )
163
+ raise
164
+ except Exception :
165
+ task_logger .exception ("Unexpected exception" )
166
+ r .delete (rcd .fence_key )
102
167
return None
168
+ else :
169
+ # Currently we are allowing the sync to proceed with 0 tasks.
170
+ # It's possible for sets/groups to be generated initially with no entries
171
+ # and they still need to be marked as up to date.
172
+ # if tasks_generated == 0:
173
+ # return 0
103
174
104
- # Currently we are allowing the sync to proceed with 0 tasks.
105
- # It's possible for sets/groups to be generated initially with no entries
106
- # and they still need to be marked as up to date.
107
- # if tasks_generated == 0:
108
- # return 0
175
+ task_logger .info (
176
+ f"RedisConnectorDeletion.generate_tasks finished. "
177
+ f"cc_pair={ cc_pair_id } tasks_generated={ tasks_generated } "
178
+ )
109
179
110
- task_logger .info (
111
- f"RedisConnectorDeletion.generate_tasks finished. "
112
- f"cc_pair_id={ cc_pair .id } tasks_generated={ tasks_generated } "
113
- )
180
+ # set this only after all tasks have been added
181
+ fence_value .num_tasks = tasks_generated
182
+ r .set (rcd .fence_key , fence_value .model_dump_json ())
114
183
115
- # set this only after all tasks have been added
116
- r .set (rcd .fence_key , tasks_generated )
117
184
return tasks_generated
0 commit comments