Skip to content

Commit b07cda1

Browse files
rkuo-danswerRichard Kuo (Onyx)
andauthored
fix provisioning and don't spawn tasks which could result in a race condition (onyx-dot-app#4604)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
1 parent fdb5328 commit b07cda1

File tree

2 files changed

+17
-30
lines changed

2 files changed

+17
-30
lines changed

backend/onyx/background/celery/tasks/tenant_provisioning/tasks.py

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,8 @@
1414
from ee.onyx.server.tenants.schema_management import create_schema_if_not_exists
1515
from ee.onyx.server.tenants.schema_management import get_current_alembic_version
1616
from onyx.background.celery.apps.app_base import task_logger
17-
from onyx.configs.app_configs import JOB_TIMEOUT
1817
from onyx.configs.app_configs import TARGET_AVAILABLE_TENANTS
19-
from onyx.configs.constants import OnyxCeleryPriority
18+
from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
2019
from onyx.configs.constants import OnyxCeleryQueues
2120
from onyx.configs.constants import OnyxCeleryTask
2221
from onyx.configs.constants import OnyxRedisLocks
@@ -39,7 +38,8 @@
3938
name=OnyxCeleryTask.CLOUD_CHECK_AVAILABLE_TENANTS,
4039
queue=OnyxCeleryQueues.MONITORING,
4140
ignore_result=True,
42-
soft_time_limit=JOB_TIMEOUT,
41+
soft_time_limit=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,
42+
time_limit=_TENANT_PROVISIONING_TIME_LIMIT,
4343
trail=False,
4444
bind=True,
4545
)
@@ -55,7 +55,7 @@ def check_available_tenants(self: Task) -> None:
5555
)
5656
return
5757

58-
r = get_redis_client()
58+
r = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
5959
lock_check: RedisLock = r.lock(
6060
OnyxRedisLocks.CHECK_AVAILABLE_TENANTS_LOCK,
6161
timeout=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,
@@ -71,32 +71,28 @@ def check_available_tenants(self: Task) -> None:
7171
try:
7272
# Get the current count of available tenants
7373
with get_session_with_shared_schema() as db_session:
74-
available_tenants_count = db_session.query(AvailableTenant).count()
74+
num_available_tenants = db_session.query(AvailableTenant).count()
7575

7676
# Get the target number of available tenants
77-
target_available_tenants = getattr(
77+
num_minimum_available_tenants = getattr(
7878
TARGET_AVAILABLE_TENANTS, "value", DEFAULT_TARGET_AVAILABLE_TENANTS
7979
)
8080

8181
# Calculate how many new tenants we need to provision
82-
tenants_to_provision = max(
83-
0, target_available_tenants - available_tenants_count
84-
)
82+
if num_available_tenants < num_minimum_available_tenants:
83+
tenants_to_provision = num_minimum_available_tenants - num_available_tenants
84+
else:
85+
tenants_to_provision = 0
8586

8687
task_logger.info(
87-
f"Available tenants: {available_tenants_count}, "
88-
f"Target: {target_available_tenants}, "
88+
f"Available tenants: {num_available_tenants}, "
89+
f"Target minimum available tenants: {num_minimum_available_tenants}, "
8990
f"To provision: {tenants_to_provision}"
9091
)
9192

92-
# Trigger pre-provisioning tasks for each tenant needed
93-
for _ in range(tenants_to_provision):
94-
from celery import current_app
95-
96-
current_app.send_task(
97-
OnyxCeleryTask.PRE_PROVISION_TENANT,
98-
priority=OnyxCeleryPriority.LOW,
99-
)
93+
# just provision one tenant each time we run this ... increase if needed.
94+
if tenants_to_provision > 0:
95+
pre_provision_tenant()
10096

10197
except Exception:
10298
task_logger.exception("Error in check_available_tenants task")
@@ -105,15 +101,7 @@ def check_available_tenants(self: Task) -> None:
105101
lock_check.release()
106102

107103

108-
@shared_task(
109-
name=OnyxCeleryTask.PRE_PROVISION_TENANT,
110-
ignore_result=True,
111-
soft_time_limit=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,
112-
time_limit=_TENANT_PROVISIONING_TIME_LIMIT,
113-
queue=OnyxCeleryQueues.MONITORING,
114-
bind=True,
115-
)
116-
def pre_provision_tenant(self: Task) -> None:
104+
def pre_provision_tenant() -> None:
117105
"""
118106
Pre-provision a new tenant and store it in the NewAvailableTenant table.
119107
This function fully sets up the tenant with all necessary configurations,
@@ -122,7 +110,7 @@ def pre_provision_tenant(self: Task) -> None:
122110
# The MULTI_TENANT check is now done at the caller level (check_available_tenants)
123111
# rather than inside this function
124112

125-
r = get_redis_client()
113+
r = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
126114
lock_provision: RedisLock = r.lock(
127115
OnyxRedisLocks.PRE_PROVISION_TENANT_LOCK,
128116
timeout=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,

backend/onyx/configs/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,6 @@ class OnyxCeleryTask:
406406
)
407407

408408
# Tenant pre-provisioning
409-
PRE_PROVISION_TENANT = f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_pre_provision_tenant"
410409
UPDATE_USER_FILE_FOLDER_METADATA = "update_user_file_folder_metadata"
411410

412411
CHECK_FOR_CONNECTOR_DELETION = "check_for_connector_deletion_task"

0 commit comments

Comments
 (0)