From f47cc6ea08958652c547262da95b0c9ce337e10f Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 8 May 2025 15:38:37 -0700 Subject: [PATCH 1/9] Remove some ee imports --- .../background/celery/tasks/cloud/tasks.py | 104 ++++++++++ .../tasks/doc_permission_syncing/tasks.py | 0 .../group_sync_utils.py | 0 .../tasks/external_group_syncing/tasks.py | 6 +- backend/ee/onyx/configs/app_configs.py | 2 - backend/ee/onyx/main.py | 2 + backend/ee/onyx/server/documents/cc_pair.py | 177 ++++++++++++++++++ .../onyx/server/middleware/tenant_tracking.py | 2 +- .../server/tenants/anonymous_users_api.py | 2 +- backend/onyx/auth/users.py | 2 +- .../background/celery/tasks/shared/tasks.py | 95 ---------- backend/onyx/configs/constants.py | 1 + backend/onyx/server/documents/cc_pair.py | 154 --------------- 13 files changed, 290 insertions(+), 257 deletions(-) create mode 100644 backend/ee/onyx/background/celery/tasks/cloud/tasks.py rename backend/{ => ee}/onyx/background/celery/tasks/doc_permission_syncing/tasks.py (100%) rename backend/{ => ee}/onyx/background/celery/tasks/external_group_syncing/group_sync_utils.py (100%) rename backend/{ => ee}/onyx/background/celery/tasks/external_group_syncing/tasks.py (99%) create mode 100644 backend/ee/onyx/server/documents/cc_pair.py diff --git a/backend/ee/onyx/background/celery/tasks/cloud/tasks.py b/backend/ee/onyx/background/celery/tasks/cloud/tasks.py new file mode 100644 index 00000000000..752373397fd --- /dev/null +++ b/backend/ee/onyx/background/celery/tasks/cloud/tasks.py @@ -0,0 +1,104 @@ +import time + +from celery import shared_task +from celery import Task +from celery.exceptions import SoftTimeLimitExceeded +from redis.lock import Lock as RedisLock + +from ee.onyx.server.tenants.product_gating import get_gated_tenants +from onyx.background.celery.apps.app_base import task_logger +from onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT +from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT +from onyx.configs.constants import ONYX_CLOUD_TENANT_ID +from onyx.configs.constants import OnyxCeleryPriority +from onyx.configs.constants import OnyxCeleryTask +from onyx.configs.constants import OnyxRedisLocks +from onyx.db.engine import get_all_tenant_ids +from onyx.redis.redis_pool import get_redis_client +from onyx.redis.redis_pool import redis_lock_dump +from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST + + +@shared_task( + name=OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR, + ignore_result=True, + trail=False, + bind=True, +) +def cloud_beat_task_generator( + self: Task, + task_name: str, + queue: str = OnyxCeleryTask.DEFAULT, + priority: int = OnyxCeleryPriority.MEDIUM, + expires: int = BEAT_EXPIRES_DEFAULT, +) -> bool | None: + """a lightweight task used to kick off individual beat tasks per tenant.""" + time_start = time.monotonic() + + redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID) + + lock_beat: RedisLock = redis_client.lock( + f"{OnyxRedisLocks.CLOUD_BEAT_TASK_GENERATOR_LOCK}:{task_name}", + timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT, + ) + + # these tasks should never overlap + if not lock_beat.acquire(blocking=False): + return None + + last_lock_time = time.monotonic() + tenant_ids: list[str] = [] + num_processed_tenants = 0 + + try: + tenant_ids = get_all_tenant_ids() + gated_tenants = get_gated_tenants() + for tenant_id in tenant_ids: + if tenant_id in gated_tenants: + continue + + current_time = time.monotonic() + if current_time - last_lock_time >= (CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4): + lock_beat.reacquire() + last_lock_time = current_time + + # needed in the cloud + if IGNORED_SYNCING_TENANT_LIST and tenant_id in IGNORED_SYNCING_TENANT_LIST: + continue + + self.app.send_task( + task_name, + kwargs=dict( + tenant_id=tenant_id, + ), + queue=queue, + priority=priority, + expires=expires, + ignore_result=True, + ) + + num_processed_tenants += 1 + except SoftTimeLimitExceeded: + task_logger.info( + "Soft time limit exceeded, task is being terminated gracefully." + ) + except Exception: + task_logger.exception("Unexpected exception during cloud_beat_task_generator") + finally: + if not lock_beat.owned(): + task_logger.error( + "cloud_beat_task_generator - Lock not owned on completion" + ) + redis_lock_dump(lock_beat, redis_client) + else: + lock_beat.release() + + time_elapsed = time.monotonic() - time_start + task_logger.info( + f"cloud_beat_task_generator finished: " + f"task={task_name} " + f"num_processed_tenants={num_processed_tenants} " + f"num_tenants={len(tenant_ids)} " + f"elapsed={time_elapsed:.2f}" + ) + return True diff --git a/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py b/backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py similarity index 100% rename from backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py rename to backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py diff --git a/backend/onyx/background/celery/tasks/external_group_syncing/group_sync_utils.py b/backend/ee/onyx/background/celery/tasks/external_group_syncing/group_sync_utils.py similarity index 100% rename from backend/onyx/background/celery/tasks/external_group_syncing/group_sync_utils.py rename to backend/ee/onyx/background/celery/tasks/external_group_syncing/group_sync_utils.py diff --git a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py b/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py similarity index 99% rename from backend/onyx/background/celery/tasks/external_group_syncing/tasks.py rename to backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py index b76161d018f..906b27ea245 100644 --- a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py +++ b/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py @@ -14,6 +14,9 @@ from redis import Redis from redis.lock import Lock as RedisLock +from ee.onyx.background.celery.tasks.external_group_syncing.group_sync_utils import ( + mark_all_relevant_cc_pairs_as_external_group_synced, +) from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs from ee.onyx.db.connector_credential_pair import get_cc_pairs_by_source from ee.onyx.db.external_perm import ExternalUserGroup @@ -26,9 +29,6 @@ from onyx.background.celery.apps.app_base import task_logger from onyx.background.celery.celery_redis import celery_find_task from onyx.background.celery.celery_redis import celery_get_unacked_task_ids -from onyx.background.celery.tasks.external_group_syncing.group_sync_utils import ( - mark_all_relevant_cc_pairs_as_external_group_synced, -) from onyx.background.error_logging import emit_background_error from onyx.configs.app_configs import JOB_TIMEOUT from onyx.configs.constants import CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT diff --git a/backend/ee/onyx/configs/app_configs.py b/backend/ee/onyx/configs/app_configs.py index dcb9eba5cff..2771d3c9262 100644 --- a/backend/ee/onyx/configs/app_configs.py +++ b/backend/ee/onyx/configs/app_configs.py @@ -116,6 +116,4 @@ HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL") -ANONYMOUS_USER_COOKIE_NAME = "onyx_anonymous_user" - GATED_TENANTS_KEY = "gated_tenants" diff --git a/backend/ee/onyx/main.py b/backend/ee/onyx/main.py index 8ecd6b2588d..036adce064b 100644 --- a/backend/ee/onyx/main.py +++ b/backend/ee/onyx/main.py @@ -10,6 +10,7 @@ from ee.onyx.configs.app_configs import OPENID_CONFIG_URL from ee.onyx.server.analytics.api import router as analytics_router from ee.onyx.server.auth_check import check_ee_router_auth +from ee.onyx.server.documents.cc_pair import router as ee_document_cc_pair_router from ee.onyx.server.enterprise_settings.api import ( admin_router as enterprise_settings_admin_router, ) @@ -167,6 +168,7 @@ def get_application() -> FastAPI: include_router_with_global_prefix_prepended(application, chat_router) include_router_with_global_prefix_prepended(application, standard_answer_router) include_router_with_global_prefix_prepended(application, ee_oauth_router) + include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router) # Enterprise-only global settings include_router_with_global_prefix_prepended( diff --git a/backend/ee/onyx/server/documents/cc_pair.py b/backend/ee/onyx/server/documents/cc_pair.py new file mode 100644 index 00000000000..927a5cad551 --- /dev/null +++ b/backend/ee/onyx/server/documents/cc_pair.py @@ -0,0 +1,177 @@ +from datetime import datetime +from http import HTTPStatus + +from fastapi import APIRouter +from fastapi import Depends +from fastapi import HTTPException +from sqlalchemy.orm import Session + +from ee.onyx.background.celery.tasks.doc_permission_syncing.tasks import ( + try_creating_permissions_sync_task, +) +from ee.onyx.background.celery.tasks.external_group_syncing.tasks import ( + try_creating_external_group_sync_task, +) +from onyx.auth.users import current_curator_or_admin_user +from onyx.background.celery.versioned_apps.client import app as client_app +from onyx.db.connector_credential_pair import ( + get_connector_credential_pair_from_id_for_user, +) +from onyx.db.engine import get_session +from onyx.db.models import User +from onyx.redis.redis_connector import RedisConnector +from onyx.redis.redis_pool import get_redis_client +from onyx.server.models import StatusResponse +from onyx.utils.logger import setup_logger +from shared_configs.contextvars import get_current_tenant_id + +logger = setup_logger() +router = APIRouter(prefix="/manage") + + +@router.get("/admin/cc-pair/{cc_pair_id}/sync-permissions") +def get_cc_pair_latest_sync( + cc_pair_id: int, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> datetime | None: + cc_pair = get_connector_credential_pair_from_id_for_user( + cc_pair_id=cc_pair_id, + db_session=db_session, + user=user, + get_editable=False, + ) + if not cc_pair: + raise HTTPException( + status_code=400, + detail="cc_pair not found for current user's permissions", + ) + + return cc_pair.last_time_perm_sync + + +@router.post("/admin/cc-pair/{cc_pair_id}/sync-permissions") +def sync_cc_pair( + cc_pair_id: int, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> StatusResponse[list[int]]: + """Triggers permissions sync on a particular cc_pair immediately""" + tenant_id = get_current_tenant_id() + + cc_pair = get_connector_credential_pair_from_id_for_user( + cc_pair_id=cc_pair_id, + db_session=db_session, + user=user, + get_editable=False, + ) + if not cc_pair: + raise HTTPException( + status_code=400, + detail="Connection not found for current user's permissions", + ) + + r = get_redis_client() + + redis_connector = RedisConnector(tenant_id, cc_pair_id) + if redis_connector.permissions.fenced: + raise HTTPException( + status_code=HTTPStatus.CONFLICT, + detail="Permissions sync task already in progress.", + ) + + logger.info( + f"Permissions sync cc_pair={cc_pair_id} " + f"connector_id={cc_pair.connector_id} " + f"credential_id={cc_pair.credential_id} " + f"{cc_pair.connector.name} connector." + ) + payload_id = try_creating_permissions_sync_task( + client_app, cc_pair_id, r, tenant_id + ) + if not payload_id: + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Permissions sync task creation failed.", + ) + + logger.info(f"Permissions sync queued: cc_pair={cc_pair_id} id={payload_id}") + + return StatusResponse( + success=True, + message="Successfully created the permissions sync task.", + ) + + +@router.get("/admin/cc-pair/{cc_pair_id}/sync-groups") +def get_cc_pair_latest_group_sync( + cc_pair_id: int, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> datetime | None: + cc_pair = get_connector_credential_pair_from_id_for_user( + cc_pair_id=cc_pair_id, + db_session=db_session, + user=user, + get_editable=False, + ) + if not cc_pair: + raise HTTPException( + status_code=400, + detail="cc_pair not found for current user's permissions", + ) + + return cc_pair.last_time_external_group_sync + + +@router.post("/admin/cc-pair/{cc_pair_id}/sync-groups") +def sync_cc_pair_groups( + cc_pair_id: int, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> StatusResponse[list[int]]: + """Triggers group sync on a particular cc_pair immediately""" + tenant_id = get_current_tenant_id() + + cc_pair = get_connector_credential_pair_from_id_for_user( + cc_pair_id=cc_pair_id, + db_session=db_session, + user=user, + get_editable=False, + ) + if not cc_pair: + raise HTTPException( + status_code=400, + detail="Connection not found for current user's permissions", + ) + + r = get_redis_client() + + redis_connector = RedisConnector(tenant_id, cc_pair_id) + if redis_connector.external_group_sync.fenced: + raise HTTPException( + status_code=HTTPStatus.CONFLICT, + detail="External group sync task already in progress.", + ) + + logger.info( + f"External group sync cc_pair={cc_pair_id} " + f"connector_id={cc_pair.connector_id} " + f"credential_id={cc_pair.credential_id} " + f"{cc_pair.connector.name} connector." + ) + payload_id = try_creating_external_group_sync_task( + client_app, cc_pair_id, r, tenant_id + ) + if not payload_id: + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="External group sync task creation failed.", + ) + + logger.info(f"External group sync queued: cc_pair={cc_pair_id} id={payload_id}") + + return StatusResponse( + success=True, + message="Successfully created the external group sync task.", + ) diff --git a/backend/ee/onyx/server/middleware/tenant_tracking.py b/backend/ee/onyx/server/middleware/tenant_tracking.py index 390711f6f38..fc088fd0112 100644 --- a/backend/ee/onyx/server/middleware/tenant_tracking.py +++ b/backend/ee/onyx/server/middleware/tenant_tracking.py @@ -8,8 +8,8 @@ from fastapi import Response from ee.onyx.auth.users import decode_anonymous_user_jwt_token -from ee.onyx.configs.app_configs import ANONYMOUS_USER_COOKIE_NAME from onyx.auth.api_key import extract_tenant_from_api_key_header +from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME from onyx.configs.constants import TENANT_ID_COOKIE_NAME from onyx.db.engine import is_valid_schema_name from onyx.redis.redis_pool import retrieve_auth_token_data_from_redis diff --git a/backend/ee/onyx/server/tenants/anonymous_users_api.py b/backend/ee/onyx/server/tenants/anonymous_users_api.py index 0dccc0916b0..653bb66b5b2 100644 --- a/backend/ee/onyx/server/tenants/anonymous_users_api.py +++ b/backend/ee/onyx/server/tenants/anonymous_users_api.py @@ -5,7 +5,6 @@ from sqlalchemy.exc import IntegrityError from ee.onyx.auth.users import generate_anonymous_user_jwt_token -from ee.onyx.configs.app_configs import ANONYMOUS_USER_COOKIE_NAME from ee.onyx.server.tenants.anonymous_user_path import get_anonymous_user_path from ee.onyx.server.tenants.anonymous_user_path import ( get_tenant_id_for_anonymous_user_path, @@ -17,6 +16,7 @@ from onyx.auth.users import current_admin_user from onyx.auth.users import optional_user from onyx.auth.users import User +from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME from onyx.db.engine import get_session_with_shared_schema from onyx.utils.logger import setup_logger diff --git a/backend/onyx/auth/users.py b/backend/onyx/auth/users.py index 1b12a7baa42..90fb87869d9 100644 --- a/backend/onyx/auth/users.py +++ b/backend/onyx/auth/users.py @@ -56,7 +56,6 @@ from pydantic import BaseModel from sqlalchemy.ext.asyncio import AsyncSession -from ee.onyx.configs.app_configs import ANONYMOUS_USER_COOKIE_NAME from onyx.auth.api_key import get_hashed_api_key_from_request from onyx.auth.email_utils import send_forgot_password_email from onyx.auth.email_utils import send_user_verification_email @@ -77,6 +76,7 @@ from onyx.configs.app_configs import USER_AUTH_SECRET from onyx.configs.app_configs import VALID_EMAIL_DOMAINS from onyx.configs.app_configs import WEB_DOMAIN +from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME from onyx.configs.constants import AuthType from onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN from onyx.configs.constants import DANSWER_API_KEY_PREFIX diff --git a/backend/onyx/background/celery/tasks/shared/tasks.py b/backend/onyx/background/celery/tasks/shared/tasks.py index 5b151ec27d7..8ce47473e28 100644 --- a/backend/onyx/background/celery/tasks/shared/tasks.py +++ b/backend/onyx/background/celery/tasks/shared/tasks.py @@ -7,20 +7,13 @@ from celery import Task from celery.exceptions import SoftTimeLimitExceeded from redis import Redis -from redis.lock import Lock as RedisLock from tenacity import RetryError -from ee.onyx.server.tenants.product_gating import get_gated_tenants from onyx.access.access import get_access_for_document from onyx.background.celery.apps.app_base import task_logger -from onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex -from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT from onyx.configs.constants import ONYX_CELERY_BEAT_HEARTBEAT_KEY -from onyx.configs.constants import ONYX_CLOUD_TENANT_ID -from onyx.configs.constants import OnyxCeleryPriority from onyx.configs.constants import OnyxCeleryTask -from onyx.configs.constants import OnyxRedisLocks from onyx.db.document import delete_document_by_connector_credential_pair__no_commit from onyx.db.document import delete_documents_complete__no_commit from onyx.db.document import fetch_chunk_count_for_document @@ -29,16 +22,13 @@ from onyx.db.document import mark_document_as_modified from onyx.db.document import mark_document_as_synced from onyx.db.document_set import fetch_document_sets_for_document -from onyx.db.engine import get_all_tenant_ids from onyx.db.engine import get_session_with_current_tenant from onyx.db.search_settings import get_active_search_settings from onyx.document_index.factory import get_default_document_index from onyx.document_index.interfaces import VespaDocumentFields from onyx.httpx.httpx_pool import HttpxPool from onyx.redis.redis_pool import get_redis_client -from onyx.redis.redis_pool import redis_lock_dump from onyx.server.documents.models import ConnectorCredentialPairIdentifier -from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES = 3 @@ -272,91 +262,6 @@ def document_by_cc_pair_cleanup_task( return True -@shared_task( - name=OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR, - ignore_result=True, - trail=False, - bind=True, -) -def cloud_beat_task_generator( - self: Task, - task_name: str, - queue: str = OnyxCeleryTask.DEFAULT, - priority: int = OnyxCeleryPriority.MEDIUM, - expires: int = BEAT_EXPIRES_DEFAULT, -) -> bool | None: - """a lightweight task used to kick off individual beat tasks per tenant.""" - time_start = time.monotonic() - - redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID) - - lock_beat: RedisLock = redis_client.lock( - f"{OnyxRedisLocks.CLOUD_BEAT_TASK_GENERATOR_LOCK}:{task_name}", - timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT, - ) - - # these tasks should never overlap - if not lock_beat.acquire(blocking=False): - return None - - last_lock_time = time.monotonic() - tenant_ids: list[str] = [] - num_processed_tenants = 0 - - try: - tenant_ids = get_all_tenant_ids() - gated_tenants = get_gated_tenants() - for tenant_id in tenant_ids: - if tenant_id in gated_tenants: - continue - - current_time = time.monotonic() - if current_time - last_lock_time >= (CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4): - lock_beat.reacquire() - last_lock_time = current_time - - # needed in the cloud - if IGNORED_SYNCING_TENANT_LIST and tenant_id in IGNORED_SYNCING_TENANT_LIST: - continue - - self.app.send_task( - task_name, - kwargs=dict( - tenant_id=tenant_id, - ), - queue=queue, - priority=priority, - expires=expires, - ignore_result=True, - ) - - num_processed_tenants += 1 - except SoftTimeLimitExceeded: - task_logger.info( - "Soft time limit exceeded, task is being terminated gracefully." - ) - except Exception: - task_logger.exception("Unexpected exception during cloud_beat_task_generator") - finally: - if not lock_beat.owned(): - task_logger.error( - "cloud_beat_task_generator - Lock not owned on completion" - ) - redis_lock_dump(lock_beat, redis_client) - else: - lock_beat.release() - - time_elapsed = time.monotonic() - time_start - task_logger.info( - f"cloud_beat_task_generator finished: " - f"task={task_name} " - f"num_processed_tenants={num_processed_tenants} " - f"num_tenants={len(tenant_ids)} " - f"elapsed={time_elapsed:.2f}" - ) - return True - - @shared_task(name=OnyxCeleryTask.CELERY_BEAT_HEARTBEAT, ignore_result=True, bind=True) def celery_beat_heartbeat(self: Task, *, tenant_id: str) -> None: """When this task runs, it writes a key to Redis with a TTL. diff --git a/backend/onyx/configs/constants.py b/backend/onyx/configs/constants.py index 18b69172488..361167c7441 100644 --- a/backend/onyx/configs/constants.py +++ b/backend/onyx/configs/constants.py @@ -24,6 +24,7 @@ "fastapiusersauth" # Currently a constant, but logic allows for configuration ) TENANT_ID_COOKIE_NAME = "onyx_tid" # tenant id - for workaround cases +ANONYMOUS_USER_COOKIE_NAME = "onyx_anonymous_user" NO_AUTH_USER_ID = "__no_auth_user__" NO_AUTH_USER_EMAIL = "anonymous@onyx.app" diff --git a/backend/onyx/server/documents/cc_pair.py b/backend/onyx/server/documents/cc_pair.py index c151ded97cb..c7ef3ceecf8 100644 --- a/backend/onyx/server/documents/cc_pair.py +++ b/backend/onyx/server/documents/cc_pair.py @@ -12,12 +12,6 @@ from onyx.auth.users import current_curator_or_admin_user from onyx.auth.users import current_user from onyx.background.celery.celery_utils import get_deletion_attempt_snapshot -from onyx.background.celery.tasks.doc_permission_syncing.tasks import ( - try_creating_permissions_sync_task, -) -from onyx.background.celery.tasks.external_group_syncing.tasks import ( - try_creating_external_group_sync_task, -) from onyx.background.celery.tasks.pruning.tasks import ( try_creating_prune_generator_task, ) @@ -392,154 +386,6 @@ def prune_cc_pair( ) -@router.get("/admin/cc-pair/{cc_pair_id}/sync-permissions") -def get_cc_pair_latest_sync( - cc_pair_id: int, - user: User = Depends(current_curator_or_admin_user), - db_session: Session = Depends(get_session), -) -> datetime | None: - cc_pair = get_connector_credential_pair_from_id_for_user( - cc_pair_id=cc_pair_id, - db_session=db_session, - user=user, - get_editable=False, - ) - if not cc_pair: - raise HTTPException( - status_code=400, - detail="cc_pair not found for current user's permissions", - ) - - return cc_pair.last_time_perm_sync - - -@router.post("/admin/cc-pair/{cc_pair_id}/sync-permissions") -def sync_cc_pair( - cc_pair_id: int, - user: User = Depends(current_curator_or_admin_user), - db_session: Session = Depends(get_session), -) -> StatusResponse[list[int]]: - """Triggers permissions sync on a particular cc_pair immediately""" - tenant_id = get_current_tenant_id() - - cc_pair = get_connector_credential_pair_from_id_for_user( - cc_pair_id=cc_pair_id, - db_session=db_session, - user=user, - get_editable=False, - ) - if not cc_pair: - raise HTTPException( - status_code=400, - detail="Connection not found for current user's permissions", - ) - - r = get_redis_client() - - redis_connector = RedisConnector(tenant_id, cc_pair_id) - if redis_connector.permissions.fenced: - raise HTTPException( - status_code=HTTPStatus.CONFLICT, - detail="Permissions sync task already in progress.", - ) - - logger.info( - f"Permissions sync cc_pair={cc_pair_id} " - f"connector_id={cc_pair.connector_id} " - f"credential_id={cc_pair.credential_id} " - f"{cc_pair.connector.name} connector." - ) - payload_id = try_creating_permissions_sync_task( - client_app, cc_pair_id, r, tenant_id - ) - if not payload_id: - raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Permissions sync task creation failed.", - ) - - logger.info(f"Permissions sync queued: cc_pair={cc_pair_id} id={payload_id}") - - return StatusResponse( - success=True, - message="Successfully created the permissions sync task.", - ) - - -@router.get("/admin/cc-pair/{cc_pair_id}/sync-groups") -def get_cc_pair_latest_group_sync( - cc_pair_id: int, - user: User = Depends(current_curator_or_admin_user), - db_session: Session = Depends(get_session), -) -> datetime | None: - cc_pair = get_connector_credential_pair_from_id_for_user( - cc_pair_id=cc_pair_id, - db_session=db_session, - user=user, - get_editable=False, - ) - if not cc_pair: - raise HTTPException( - status_code=400, - detail="cc_pair not found for current user's permissions", - ) - - return cc_pair.last_time_external_group_sync - - -@router.post("/admin/cc-pair/{cc_pair_id}/sync-groups") -def sync_cc_pair_groups( - cc_pair_id: int, - user: User = Depends(current_curator_or_admin_user), - db_session: Session = Depends(get_session), -) -> StatusResponse[list[int]]: - """Triggers group sync on a particular cc_pair immediately""" - tenant_id = get_current_tenant_id() - - cc_pair = get_connector_credential_pair_from_id_for_user( - cc_pair_id=cc_pair_id, - db_session=db_session, - user=user, - get_editable=False, - ) - if not cc_pair: - raise HTTPException( - status_code=400, - detail="Connection not found for current user's permissions", - ) - - r = get_redis_client() - - redis_connector = RedisConnector(tenant_id, cc_pair_id) - if redis_connector.external_group_sync.fenced: - raise HTTPException( - status_code=HTTPStatus.CONFLICT, - detail="External group sync task already in progress.", - ) - - logger.info( - f"External group sync cc_pair={cc_pair_id} " - f"connector_id={cc_pair.connector_id} " - f"credential_id={cc_pair.credential_id} " - f"{cc_pair.connector.name} connector." - ) - payload_id = try_creating_external_group_sync_task( - client_app, cc_pair_id, r, tenant_id - ) - if not payload_id: - raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="External group sync task creation failed.", - ) - - logger.info(f"External group sync queued: cc_pair={cc_pair_id} id={payload_id}") - - return StatusResponse( - success=True, - message="Successfully created the external group sync task.", - ) - - @router.get("/admin/cc-pair/{cc_pair_id}/get-docs-sync-status") def get_docs_sync_status( cc_pair_id: int, From 50b28e0d35630b95cb37528e89563935601cf811 Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 8 May 2025 17:16:20 -0700 Subject: [PATCH 2/9] more --- .../onyx/background/celery/tasks/tenant_provisioning/tasks.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename backend/{ => ee}/onyx/background/celery/tasks/tenant_provisioning/tasks.py (100%) diff --git a/backend/onyx/background/celery/tasks/tenant_provisioning/tasks.py b/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py similarity index 100% rename from backend/onyx/background/celery/tasks/tenant_provisioning/tasks.py rename to backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py From 77d2149dd2a489f8b8529d8f915bb4347da27a6a Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 8 May 2025 18:04:01 -0700 Subject: [PATCH 3/9] Remove all ee imports --- .../background/celery/tasks/cleanup/tasks.py | 2 +- backend/ee/onyx/configs/app_configs.py | 15 --- backend/ee/onyx/db/query_history.py | 9 ++ .../slack/handlers/handle_standard_answers.py | 2 +- backend/ee/onyx/server/manage/models.py | 98 ----------------- .../ee/onyx/server/oauth/confluence_cloud.py | 4 +- backend/ee/onyx/server/oauth/google_drive.py | 4 +- backend/ee/onyx/server/oauth/slack.py | 4 +- backend/ee/onyx/server/query_history/api.py | 2 +- backend/onyx/configs/app_configs.py | 15 +++ .../connectors/confluence/onyx_confluence.py | 4 +- .../connectors/google_utils/google_auth.py | 4 +- backend/onyx/db/tasks.py | 7 +- backend/onyx/onyxbot/slack/listener.py | 11 +- backend/onyx/server/documents/models.py | 13 +-- backend/onyx/server/manage/models.py | 103 +++++++++++++++++- backend/onyx/server/manage/users.py | 14 ++- 17 files changed, 163 insertions(+), 148 deletions(-) delete mode 100644 backend/ee/onyx/server/manage/models.py diff --git a/backend/ee/onyx/background/celery/tasks/cleanup/tasks.py b/backend/ee/onyx/background/celery/tasks/cleanup/tasks.py index fd9a451f7b0..14b5578b0eb 100644 --- a/backend/ee/onyx/background/celery/tasks/cleanup/tasks.py +++ b/backend/ee/onyx/background/celery/tasks/cleanup/tasks.py @@ -3,12 +3,12 @@ from celery import shared_task +from ee.onyx.db.query_history import get_all_query_history_export_tasks from onyx.configs.app_configs import JOB_TIMEOUT from onyx.configs.constants import OnyxCeleryTask from onyx.db.engine import get_session_with_tenant from onyx.db.enums import TaskStatus from onyx.db.tasks import delete_task_with_id -from onyx.db.tasks import get_all_query_history_export_tasks from onyx.utils.logger import setup_logger diff --git a/backend/ee/onyx/configs/app_configs.py b/backend/ee/onyx/configs/app_configs.py index 2771d3c9262..063bbc14912 100644 --- a/backend/ee/onyx/configs/app_configs.py +++ b/backend/ee/onyx/configs/app_configs.py @@ -94,21 +94,6 @@ SUPER_USERS = json.loads(os.environ.get("SUPER_USERS", "[]")) SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key") -OAUTH_SLACK_CLIENT_ID = os.environ.get("OAUTH_SLACK_CLIENT_ID", "") -OAUTH_SLACK_CLIENT_SECRET = os.environ.get("OAUTH_SLACK_CLIENT_SECRET", "") -OAUTH_CONFLUENCE_CLOUD_CLIENT_ID = os.environ.get( - "OAUTH_CONFLUENCE_CLOUD_CLIENT_ID", "" -) -OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET = os.environ.get( - "OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET", "" -) -OAUTH_JIRA_CLOUD_CLIENT_ID = os.environ.get("OAUTH_JIRA_CLOUD_CLIENT_ID", "") -OAUTH_JIRA_CLOUD_CLIENT_SECRET = os.environ.get("OAUTH_JIRA_CLOUD_CLIENT_SECRET", "") -OAUTH_GOOGLE_DRIVE_CLIENT_ID = os.environ.get("OAUTH_GOOGLE_DRIVE_CLIENT_ID", "") -OAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get( - "OAUTH_GOOGLE_DRIVE_CLIENT_SECRET", "" -) - # The posthog client does not accept empty API keys or hosts however it fails silently # when the capture is called. These defaults prevent Posthog issues from breaking the Onyx app POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY") or "FooBar" diff --git a/backend/ee/onyx/db/query_history.py b/backend/ee/onyx/db/query_history.py index bddef18a4d9..244787c11ce 100644 --- a/backend/ee/onyx/db/query_history.py +++ b/backend/ee/onyx/db/query_history.py @@ -15,10 +15,13 @@ from sqlalchemy.sql.expression import literal from sqlalchemy.sql.expression import UnaryExpression +from ee.onyx.background.task_name_builders import QUERY_HISTORY_TASK_NAME_PREFIX from onyx.configs.constants import QAFeedbackType from onyx.db.models import ChatMessage from onyx.db.models import ChatMessageFeedback from onyx.db.models import ChatSession +from onyx.db.models import TaskQueueState +from onyx.db.tasks import get_all_tasks_with_prefix def _build_filter_conditions( @@ -171,3 +174,9 @@ def fetch_chat_sessions_eagerly_by_time( chat_sessions = query.all() return chat_sessions + + +def get_all_query_history_export_tasks( + db_session: Session, +) -> list[TaskQueueState]: + return get_all_tasks_with_prefix(db_session, QUERY_HISTORY_TASK_NAME_PREFIX) diff --git a/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py b/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py index 3f7b0893453..c2149f7a94a 100644 --- a/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py +++ b/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py @@ -7,7 +7,6 @@ from ee.onyx.db.standard_answer import fetch_standard_answer_categories_by_names from ee.onyx.db.standard_answer import find_matching_standard_answers -from ee.onyx.server.manage.models import StandardAnswer as PydanticStandardAnswer from onyx.configs.constants import MessageType from onyx.configs.onyxbot_configs import DANSWER_REACT_EMOJI from onyx.db.chat import create_chat_session @@ -24,6 +23,7 @@ from onyx.onyxbot.slack.models import SlackMessageInfo from onyx.onyxbot.slack.utils import respond_in_thread_or_channel from onyx.onyxbot.slack.utils import update_emote_react +from onyx.server.manage.models import StandardAnswer as PydanticStandardAnswer from onyx.utils.logger import OnyxLoggingAdapter from onyx.utils.logger import setup_logger diff --git a/backend/ee/onyx/server/manage/models.py b/backend/ee/onyx/server/manage/models.py deleted file mode 100644 index 75e94d3aa65..00000000000 --- a/backend/ee/onyx/server/manage/models.py +++ /dev/null @@ -1,98 +0,0 @@ -import re -from typing import Any - -from pydantic import BaseModel -from pydantic import field_validator -from pydantic import model_validator - -from onyx.db.models import StandardAnswer as StandardAnswerModel -from onyx.db.models import StandardAnswerCategory as StandardAnswerCategoryModel - - -class StandardAnswerCategoryCreationRequest(BaseModel): - name: str - - -class StandardAnswerCategory(BaseModel): - id: int - name: str - - @classmethod - def from_model( - cls, standard_answer_category: StandardAnswerCategoryModel - ) -> "StandardAnswerCategory": - return cls( - id=standard_answer_category.id, - name=standard_answer_category.name, - ) - - -class StandardAnswer(BaseModel): - id: int - keyword: str - answer: str - categories: list[StandardAnswerCategory] - match_regex: bool - match_any_keywords: bool - - @classmethod - def from_model(cls, standard_answer_model: StandardAnswerModel) -> "StandardAnswer": - return cls( - id=standard_answer_model.id, - keyword=standard_answer_model.keyword, - answer=standard_answer_model.answer, - match_regex=standard_answer_model.match_regex, - match_any_keywords=standard_answer_model.match_any_keywords, - categories=[ - StandardAnswerCategory.from_model(standard_answer_category_model) - for standard_answer_category_model in standard_answer_model.categories - ], - ) - - -class StandardAnswerCreationRequest(BaseModel): - keyword: str - answer: str - categories: list[int] - match_regex: bool - match_any_keywords: bool - - @field_validator("categories", mode="before") - @classmethod - def validate_categories(cls, value: list[int]) -> list[int]: - if len(value) < 1: - raise ValueError( - "At least one category must be attached to a standard answer" - ) - return value - - @model_validator(mode="after") - def validate_only_match_any_if_not_regex(self) -> Any: - if self.match_regex and self.match_any_keywords: - raise ValueError( - "Can only match any keywords in keyword mode, not regex mode" - ) - - return self - - @model_validator(mode="after") - def validate_keyword_if_regex(self) -> Any: - if not self.match_regex: - # no validation for keywords - return self - - try: - re.compile(self.keyword) - return self - except re.error as err: - if isinstance(err.pattern, bytes): - raise ValueError( - f'invalid regex pattern r"{err.pattern.decode()}" in `keyword`: {err.msg}' - ) - else: - pattern = f'r"{err.pattern}"' if err.pattern is not None else "" - raise ValueError( - " ".join( - ["invalid regex pattern", pattern, f"in `keyword`: {err.msg}"] - ) - ) diff --git a/backend/ee/onyx/server/oauth/confluence_cloud.py b/backend/ee/onyx/server/oauth/confluence_cloud.py index af83720806d..acea51444d9 100644 --- a/backend/ee/onyx/server/oauth/confluence_cloud.py +++ b/backend/ee/onyx/server/oauth/confluence_cloud.py @@ -14,11 +14,11 @@ from pydantic import ValidationError from sqlalchemy.orm import Session -from ee.onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_ID -from ee.onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET from ee.onyx.server.oauth.api_router import router from onyx.auth.users import current_admin_user from onyx.configs.app_configs import DEV_MODE +from onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_ID +from onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET from onyx.configs.app_configs import WEB_DOMAIN from onyx.configs.constants import DocumentSource from onyx.connectors.confluence.utils import CONFLUENCE_OAUTH_TOKEN_URL diff --git a/backend/ee/onyx/server/oauth/google_drive.py b/backend/ee/onyx/server/oauth/google_drive.py index 07a27fd9284..023f1c73964 100644 --- a/backend/ee/onyx/server/oauth/google_drive.py +++ b/backend/ee/onyx/server/oauth/google_drive.py @@ -11,11 +11,11 @@ from pydantic import BaseModel from sqlalchemy.orm import Session -from ee.onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_ID -from ee.onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_SECRET from ee.onyx.server.oauth.api_router import router from onyx.auth.users import current_admin_user from onyx.configs.app_configs import DEV_MODE +from onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_ID +from onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_SECRET from onyx.configs.app_configs import WEB_DOMAIN from onyx.configs.constants import DocumentSource from onyx.connectors.google_utils.google_auth import get_google_oauth_creds diff --git a/backend/ee/onyx/server/oauth/slack.py b/backend/ee/onyx/server/oauth/slack.py index e8c5c3063f8..19498830105 100644 --- a/backend/ee/onyx/server/oauth/slack.py +++ b/backend/ee/onyx/server/oauth/slack.py @@ -9,11 +9,11 @@ from pydantic import BaseModel from sqlalchemy.orm import Session -from ee.onyx.configs.app_configs import OAUTH_SLACK_CLIENT_ID -from ee.onyx.configs.app_configs import OAUTH_SLACK_CLIENT_SECRET from ee.onyx.server.oauth.api_router import router from onyx.auth.users import current_admin_user from onyx.configs.app_configs import DEV_MODE +from onyx.configs.app_configs import OAUTH_SLACK_CLIENT_ID +from onyx.configs.app_configs import OAUTH_SLACK_CLIENT_SECRET from onyx.configs.app_configs import WEB_DOMAIN from onyx.configs.constants import DocumentSource from onyx.db.credentials import create_credential diff --git a/backend/ee/onyx/server/query_history/api.py b/backend/ee/onyx/server/query_history/api.py index 0915a23d95e..3ed43ed25b4 100644 --- a/backend/ee/onyx/server/query_history/api.py +++ b/backend/ee/onyx/server/query_history/api.py @@ -11,6 +11,7 @@ from sqlalchemy.orm import Session from ee.onyx.db.query_history import fetch_chat_sessions_eagerly_by_time +from ee.onyx.db.query_history import get_all_query_history_export_tasks from ee.onyx.db.query_history import get_page_of_chat_sessions from ee.onyx.db.query_history import get_total_filtered_chat_sessions_count from ee.onyx.server.query_history.models import ChatSessionMinimal @@ -39,7 +40,6 @@ from onyx.db.models import ChatSession from onyx.db.models import User from onyx.db.pg_file_store import get_query_history_export_files -from onyx.db.tasks import get_all_query_history_export_tasks from onyx.db.tasks import get_task_with_id from onyx.file_store.file_store import get_default_file_store from onyx.server.documents.models import PaginatedReturn diff --git a/backend/onyx/configs/app_configs.py b/backend/onyx/configs/app_configs.py index 26241566ad0..6ae845af04e 100644 --- a/backend/onyx/configs/app_configs.py +++ b/backend/onyx/configs/app_configs.py @@ -674,6 +674,21 @@ def get_current_tz_offset() -> int: "CONTROL_PLANE_API_BASE_URL", "http://localhost:8082" ) +OAUTH_SLACK_CLIENT_ID = os.environ.get("OAUTH_SLACK_CLIENT_ID", "") +OAUTH_SLACK_CLIENT_SECRET = os.environ.get("OAUTH_SLACK_CLIENT_SECRET", "") +OAUTH_CONFLUENCE_CLOUD_CLIENT_ID = os.environ.get( + "OAUTH_CONFLUENCE_CLOUD_CLIENT_ID", "" +) +OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET = os.environ.get( + "OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET", "" +) +OAUTH_JIRA_CLOUD_CLIENT_ID = os.environ.get("OAUTH_JIRA_CLOUD_CLIENT_ID", "") +OAUTH_JIRA_CLOUD_CLIENT_SECRET = os.environ.get("OAUTH_JIRA_CLOUD_CLIENT_SECRET", "") +OAUTH_GOOGLE_DRIVE_CLIENT_ID = os.environ.get("OAUTH_GOOGLE_DRIVE_CLIENT_ID", "") +OAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get( + "OAUTH_GOOGLE_DRIVE_CLIENT_SECRET", "" +) + # JWT configuration JWT_ALGORITHM = "HS256" diff --git a/backend/onyx/connectors/confluence/onyx_confluence.py b/backend/onyx/connectors/confluence/onyx_confluence.py index f364c2d5802..050380a5f28 100644 --- a/backend/onyx/connectors/confluence/onyx_confluence.py +++ b/backend/onyx/connectors/confluence/onyx_confluence.py @@ -16,9 +16,9 @@ from redis import Redis from requests import HTTPError -from ee.onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_ID -from ee.onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET from onyx.configs.app_configs import CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE +from onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_ID +from onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET from onyx.connectors.confluence.models import ConfluenceUser from onyx.connectors.confluence.user_profile_override import ( process_confluence_user_profiles_override, diff --git a/backend/onyx/connectors/google_utils/google_auth.py b/backend/onyx/connectors/google_utils/google_auth.py index 40210e6f332..21d2c6987ea 100644 --- a/backend/onyx/connectors/google_utils/google_auth.py +++ b/backend/onyx/connectors/google_utils/google_auth.py @@ -5,8 +5,8 @@ from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore -from ee.onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_ID -from ee.onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_SECRET +from onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_ID +from onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_SECRET from onyx.configs.constants import DocumentSource from onyx.connectors.google_utils.shared_constants import ( DB_CREDENTIALS_AUTHENTICATION_METHOD, diff --git a/backend/onyx/db/tasks.py b/backend/onyx/db/tasks.py index 9513dfe3ca0..395ee3941ff 100644 --- a/backend/onyx/db/tasks.py +++ b/backend/onyx/db/tasks.py @@ -6,7 +6,6 @@ from sqlalchemy.orm import Session from sqlalchemy.sql import delete -from ee.onyx.background.task_name_builders import QUERY_HISTORY_TASK_NAME_PREFIX from onyx.configs.app_configs import JOB_TIMEOUT from onyx.db.engine import get_db_current_time from onyx.db.models import TaskQueueState @@ -84,13 +83,13 @@ def delete_task_with_id( db_session.commit() -def get_all_query_history_export_tasks( - db_session: Session, +def get_all_tasks_with_prefix( + db_session: Session, task_name_prefix: str ) -> list[TaskQueueState]: return list( db_session.scalars( select(TaskQueueState).where( - TaskQueueState.task_name.like(f"{QUERY_HISTORY_TASK_NAME_PREFIX}_%") + TaskQueueState.task_name.like(f"{task_name_prefix}_%") ) ) ) diff --git a/backend/onyx/onyxbot/slack/listener.py b/backend/onyx/onyxbot/slack/listener.py index 99a9036f674..91b8c9219d6 100644 --- a/backend/onyx/onyxbot/slack/listener.py +++ b/backend/onyx/onyxbot/slack/listener.py @@ -25,7 +25,6 @@ from slack_sdk.socket_mode.response import SocketModeResponse from sqlalchemy.orm import Session -from ee.onyx.server.tenants.product_gating import get_gated_tenants from onyx.chat.models import ThreadMessage from onyx.configs.app_configs import DEV_MODE from onyx.configs.app_configs import POD_NAME @@ -96,6 +95,7 @@ from onyx.redis.redis_pool import get_redis_client from onyx.server.manage.models import SlackBotTokens from onyx.utils.logger import setup_logger +from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable from shared_configs.configs import DISALLOWED_SLACK_BOT_TENANT_LIST from shared_configs.configs import MODEL_SERVER_HOST @@ -264,10 +264,17 @@ def acquire_tenants(self) -> None: - If a tenant in self.tenant_ids no longer has Slack bots, remove it (and release the lock in this scope). """ + # tenants that are disabled (e.g. their trial is over and haven't subscribed) + # for non-cloud, this will return an empty set + gated_tenants = fetch_ee_implementation_or_noop( + "onyx.server.tenants.product_gating", + "get_gated_tenants", + set(), + ) all_tenants = [ tenant_id for tenant_id in get_all_tenant_ids() - if tenant_id not in get_gated_tenants() + if tenant_id not in gated_tenants ] token: Token[str | None] diff --git a/backend/onyx/server/documents/models.py b/backend/onyx/server/documents/models.py index 365094b35fb..ac52f4532c4 100644 --- a/backend/onyx/server/documents/models.py +++ b/backend/onyx/server/documents/models.py @@ -9,8 +9,6 @@ from pydantic import BaseModel from pydantic import Field -from ee.onyx.server.query_history.models import ChatSessionMinimal -from onyx.background.indexing.models import IndexAttemptErrorPydantic from onyx.configs.app_configs import MASK_CREDENTIAL_PREFIX from onyx.configs.constants import DocumentSource from onyx.connectors.models import InputType @@ -23,8 +21,6 @@ from onyx.db.models import IndexAttempt from onyx.db.models import IndexingStatus from onyx.db.models import TaskStatus -from onyx.server.models import FullUserSnapshot -from onyx.server.models import InvitedUserSnapshot from onyx.server.utils import mask_credential_dict from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop @@ -193,14 +189,7 @@ def from_index_attempt_db_model( # These are the types currently supported by the pagination hook # More api endpoints can be refactored and be added here for use with the pagination hook -PaginatedType = TypeVar( - "PaginatedType", - IndexAttemptSnapshot, - FullUserSnapshot, - InvitedUserSnapshot, - ChatSessionMinimal, - IndexAttemptErrorPydantic, -) +PaginatedType = TypeVar("PaginatedType", bound=BaseModel) class PaginatedReturn(BaseModel, Generic[PaginatedType]): diff --git a/backend/onyx/server/manage/models.py b/backend/onyx/server/manage/models.py index d1f4e2f7f04..e7091d18dc1 100644 --- a/backend/onyx/server/manage/models.py +++ b/backend/onyx/server/manage/models.py @@ -1,5 +1,7 @@ +import re from datetime import datetime from enum import Enum +from typing import Any from typing import TYPE_CHECKING from pydantic import BaseModel @@ -8,7 +10,6 @@ from pydantic import field_validator from pydantic import model_validator -from ee.onyx.server.manage.models import StandardAnswerCategory from onyx.auth.schemas import UserRole from onyx.configs.app_configs import TRACK_EXTERNAL_IDP_EXPIRY from onyx.configs.constants import AuthType @@ -17,6 +18,8 @@ from onyx.db.models import ChannelConfig from onyx.db.models import SlackBot as SlackAppModel from onyx.db.models import SlackChannelConfig as SlackChannelConfigModel +from onyx.db.models import StandardAnswer as StandardAnswerModel +from onyx.db.models import StandardAnswerCategory as StandardAnswerCategoryModel from onyx.db.models import User from onyx.onyxbot.slack.config import VALID_SLACK_FILTERS from onyx.server.features.persona.models import FullPersonaSnapshot @@ -234,7 +237,7 @@ class SlackChannelConfig(BaseModel): persona: PersonaSnapshot | None channel_config: ChannelConfig # XXX this is going away soon - standard_answer_categories: list[StandardAnswerCategory] + standard_answer_categories: list["StandardAnswerCategory"] enable_auto_filters: bool is_default: bool @@ -307,3 +310,99 @@ class AllUsersResponse(BaseModel): class SlackChannel(BaseModel): id: str name: str + + +""" +Standard Answer Models + +ee only, but needs to be here since it's imported by non-ee models. +""" + + +class StandardAnswerCategoryCreationRequest(BaseModel): + name: str + + +class StandardAnswerCategory(BaseModel): + id: int + name: str + + @classmethod + def from_model( + cls, standard_answer_category: StandardAnswerCategoryModel + ) -> "StandardAnswerCategory": + return cls( + id=standard_answer_category.id, + name=standard_answer_category.name, + ) + + +class StandardAnswer(BaseModel): + id: int + keyword: str + answer: str + categories: list[StandardAnswerCategory] + match_regex: bool + match_any_keywords: bool + + @classmethod + def from_model(cls, standard_answer_model: StandardAnswerModel) -> "StandardAnswer": + return cls( + id=standard_answer_model.id, + keyword=standard_answer_model.keyword, + answer=standard_answer_model.answer, + match_regex=standard_answer_model.match_regex, + match_any_keywords=standard_answer_model.match_any_keywords, + categories=[ + StandardAnswerCategory.from_model(standard_answer_category_model) + for standard_answer_category_model in standard_answer_model.categories + ], + ) + + +class StandardAnswerCreationRequest(BaseModel): + keyword: str + answer: str + categories: list[int] + match_regex: bool + match_any_keywords: bool + + @field_validator("categories", mode="before") + @classmethod + def validate_categories(cls, value: list[int]) -> list[int]: + if len(value) < 1: + raise ValueError( + "At least one category must be attached to a standard answer" + ) + return value + + @model_validator(mode="after") + def validate_only_match_any_if_not_regex(self) -> Any: + if self.match_regex and self.match_any_keywords: + raise ValueError( + "Can only match any keywords in keyword mode, not regex mode" + ) + + return self + + @model_validator(mode="after") + def validate_keyword_if_regex(self) -> Any: + if not self.match_regex: + # no validation for keywords + return self + + try: + re.compile(self.keyword) + return self + except re.error as err: + if isinstance(err.pattern, bytes): + raise ValueError( + f'invalid regex pattern r"{err.pattern.decode()}" in `keyword`: {err.msg}' + ) + else: + pattern = f'r"{err.pattern}"' if err.pattern is not None else "" + raise ValueError( + " ".join( + ["invalid regex pattern", pattern, f"in `keyword`: {err.msg}"] + ) + ) diff --git a/backend/onyx/server/manage/users.py b/backend/onyx/server/manage/users.py index a179128dc6a..c570c7c8d87 100644 --- a/backend/onyx/server/manage/users.py +++ b/backend/onyx/server/manage/users.py @@ -21,7 +21,6 @@ from sqlalchemy import update from sqlalchemy.orm import Session -from ee.onyx.configs.app_configs import SUPER_USERS from onyx.auth.email_utils import send_user_email_invite from onyx.auth.invited_users import get_invited_users from onyx.auth.invited_users import write_invited_users @@ -72,6 +71,9 @@ from onyx.server.utils import BasicAuthenticationError from onyx.utils.logger import setup_logger from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop +from onyx.utils.variable_functionality import ( + fetch_versioned_implementation_with_fallback, +) from shared_configs.configs import MULTI_TENANT from shared_configs.contextvars import get_current_tenant_id @@ -649,11 +651,19 @@ def verify_user_logged_in( "onyx.server.tenants.user_mapping", "get_tenant_invitation", None )(user.email) + super_users_list = cast( + list[str], + fetch_versioned_implementation_with_fallback( + "onyx.configs.app_configs", + "SUPER_USERS", + [], + ), + ) user_info = UserInfo.from_model( user, current_token_created_at=token_created_at, expiry_length=SESSION_EXPIRE_TIME_SECONDS, - is_cloud_superuser=user.email in SUPER_USERS, + is_cloud_superuser=user.email in super_users_list, team_name=team_name, tenant_info=TenantInfo( new_tenant=new_tenant, From 3a9dcf8459cf7d39dd159594c50d495f115bbd15 Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 8 May 2025 18:12:31 -0700 Subject: [PATCH 4/9] Fix --- backend/ee/onyx/server/manage/standard_answer.py | 8 ++++---- backend/ee/onyx/server/query_and_chat/models.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/ee/onyx/server/manage/standard_answer.py b/backend/ee/onyx/server/manage/standard_answer.py index 5ac1a8fb716..56ce2cee65d 100644 --- a/backend/ee/onyx/server/manage/standard_answer.py +++ b/backend/ee/onyx/server/manage/standard_answer.py @@ -12,13 +12,13 @@ from ee.onyx.db.standard_answer import remove_standard_answer from ee.onyx.db.standard_answer import update_standard_answer from ee.onyx.db.standard_answer import update_standard_answer_category -from ee.onyx.server.manage.models import StandardAnswer -from ee.onyx.server.manage.models import StandardAnswerCategory -from ee.onyx.server.manage.models import StandardAnswerCategoryCreationRequest -from ee.onyx.server.manage.models import StandardAnswerCreationRequest from onyx.auth.users import current_admin_user from onyx.db.engine import get_session from onyx.db.models import User +from onyx.server.manage.models import StandardAnswer +from onyx.server.manage.models import StandardAnswerCategory +from onyx.server.manage.models import StandardAnswerCategoryCreationRequest +from onyx.server.manage.models import StandardAnswerCreationRequest router = APIRouter(prefix="/manage") diff --git a/backend/ee/onyx/server/query_and_chat/models.py b/backend/ee/onyx/server/query_and_chat/models.py index 8dba5af9bf6..d674e9ecb51 100644 --- a/backend/ee/onyx/server/query_and_chat/models.py +++ b/backend/ee/onyx/server/query_and_chat/models.py @@ -6,7 +6,6 @@ from pydantic import Field from pydantic import model_validator -from ee.onyx.server.manage.models import StandardAnswer from onyx.chat.models import CitationInfo from onyx.chat.models import PersonaOverrideConfig from onyx.chat.models import QADocsResponse @@ -19,6 +18,7 @@ from onyx.context.search.models import RerankingDetails from onyx.context.search.models import RetrievalDetails from onyx.context.search.models import SavedSearchDoc +from onyx.server.manage.models import StandardAnswer class StandardAnswerRequest(BaseModel): From ba06632d09a4de028c74766f2ec1042d4517f92c Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 8 May 2025 19:32:33 -0700 Subject: [PATCH 5/9] Autodiscover --- backend/ee/onyx/background/celery/apps/heavy.py | 2 ++ backend/onyx/background/celery/apps/beat.py | 6 ++++++ backend/onyx/background/celery/apps/heavy.py | 2 -- backend/onyx/background/celery/apps/light.py | 1 - backend/onyx/background/celery/apps/monitoring.py | 1 - 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/backend/ee/onyx/background/celery/apps/heavy.py b/backend/ee/onyx/background/celery/apps/heavy.py index fcb9adb183b..46b049cfb0c 100644 --- a/backend/ee/onyx/background/celery/apps/heavy.py +++ b/backend/ee/onyx/background/celery/apps/heavy.py @@ -133,6 +133,8 @@ def export_query_history_task(self: Task, *, start: datetime, end: datetime) -> celery_app.autodiscover_tasks( [ + "ee.onyx.background.celery.tasks.doc_permission_syncing", + "ee.onyx.background.celery.tasks.external_group_syncing", "ee.onyx.background.celery.tasks.cleanup", ] ) diff --git a/backend/onyx/background/celery/apps/beat.py b/backend/onyx/background/celery/apps/beat.py index f08170b7cae..2b3daf65bf2 100644 --- a/backend/onyx/background/celery/apps/beat.py +++ b/backend/onyx/background/celery/apps/beat.py @@ -256,3 +256,9 @@ def on_setup_logging( celery_app.conf.beat_scheduler = DynamicTenantScheduler celery_app.conf.task_default_base = app_base.TenantAwareTask + +celery_app.autodiscover_tasks( + [ + "ee.onyx.background.celery.tasks.cloud", + ] +) diff --git a/backend/onyx/background/celery/apps/heavy.py b/backend/onyx/background/celery/apps/heavy.py index 428345d093d..5582faad407 100644 --- a/backend/onyx/background/celery/apps/heavy.py +++ b/backend/onyx/background/celery/apps/heavy.py @@ -94,7 +94,5 @@ def on_setup_logging( celery_app.autodiscover_tasks( [ "onyx.background.celery.tasks.pruning", - "onyx.background.celery.tasks.doc_permission_syncing", - "onyx.background.celery.tasks.external_group_syncing", ] ) diff --git a/backend/onyx/background/celery/apps/light.py b/backend/onyx/background/celery/apps/light.py index c4aca82574e..39cca1dec6f 100644 --- a/backend/onyx/background/celery/apps/light.py +++ b/backend/onyx/background/celery/apps/light.py @@ -113,6 +113,5 @@ def on_setup_logging( "onyx.background.celery.tasks.doc_permission_syncing", "onyx.background.celery.tasks.user_file_folder_sync", "onyx.background.celery.tasks.indexing", - "onyx.background.celery.tasks.tenant_provisioning", ] ) diff --git a/backend/onyx/background/celery/apps/monitoring.py b/backend/onyx/background/celery/apps/monitoring.py index cd42f72f176..edaa01225d2 100644 --- a/backend/onyx/background/celery/apps/monitoring.py +++ b/backend/onyx/background/celery/apps/monitoring.py @@ -92,6 +92,5 @@ def on_setup_logging( celery_app.autodiscover_tasks( [ "onyx.background.celery.tasks.monitoring", - "onyx.background.celery.tasks.tenant_provisioning", ] ) From 966a7542857d6ffc3e2d4ced76c02c8a3b5c99cf Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 8 May 2025 19:34:14 -0700 Subject: [PATCH 6/9] fix --- backend/onyx/onyxbot/slack/listener.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/onyx/onyxbot/slack/listener.py b/backend/onyx/onyxbot/slack/listener.py index 91b8c9219d6..96b69641c00 100644 --- a/backend/onyx/onyxbot/slack/listener.py +++ b/backend/onyx/onyxbot/slack/listener.py @@ -270,7 +270,7 @@ def acquire_tenants(self) -> None: "onyx.server.tenants.product_gating", "get_gated_tenants", set(), - ) + )() all_tenants = [ tenant_id for tenant_id in get_all_tenant_ids() From 63b8e05f580b1bf511b39e7d87c5ae125cf35945 Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 8 May 2025 19:36:10 -0700 Subject: [PATCH 7/9] Fix typing --- backend/ee/onyx/server/documents/cc_pair.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/ee/onyx/server/documents/cc_pair.py b/backend/ee/onyx/server/documents/cc_pair.py index 927a5cad551..7c35889c7d8 100644 --- a/backend/ee/onyx/server/documents/cc_pair.py +++ b/backend/ee/onyx/server/documents/cc_pair.py @@ -55,7 +55,7 @@ def sync_cc_pair( cc_pair_id: int, user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), -) -> StatusResponse[list[int]]: +) -> StatusResponse[None]: """Triggers permissions sync on a particular cc_pair immediately""" tenant_id = get_current_tenant_id() @@ -129,7 +129,7 @@ def sync_cc_pair_groups( cc_pair_id: int, user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), -) -> StatusResponse[list[int]]: +) -> StatusResponse[None]: """Triggers group sync on a particular cc_pair immediately""" tenant_id = get_current_tenant_id() From d321d7f3f9bd126411095d0c46c855cc3cf7f384 Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 8 May 2025 19:51:25 -0700 Subject: [PATCH 8/9] More celery task stuff --- backend/ee/onyx/background/celery/apps/light.py | 8 ++++++++ .../ee/onyx/background/celery/apps/monitoring.py | 7 +++++++ backend/ee/onyx/background/celery/apps/primary.py | 8 ++++++++ backend/onyx/background/celery/apps/primary.py | 2 -- .../onyx/background/celery/versioned_apps/light.py | 14 +++++--------- .../background/celery/versioned_apps/monitoring.py | 14 +++++--------- 6 files changed, 33 insertions(+), 20 deletions(-) create mode 100644 backend/ee/onyx/background/celery/apps/light.py create mode 100644 backend/ee/onyx/background/celery/apps/monitoring.py diff --git a/backend/ee/onyx/background/celery/apps/light.py b/backend/ee/onyx/background/celery/apps/light.py new file mode 100644 index 00000000000..1930a8d2923 --- /dev/null +++ b/backend/ee/onyx/background/celery/apps/light.py @@ -0,0 +1,8 @@ +from onyx.background.celery.apps.monitoring import celery_app + +celery_app.autodiscover_tasks( + [ + "ee.onyx.background.celery.tasks.doc_permission_syncing", + "ee.onyx.background.celery.tasks.external_group_syncing", + ] +) diff --git a/backend/ee/onyx/background/celery/apps/monitoring.py b/backend/ee/onyx/background/celery/apps/monitoring.py new file mode 100644 index 00000000000..c7e1e4685b0 --- /dev/null +++ b/backend/ee/onyx/background/celery/apps/monitoring.py @@ -0,0 +1,7 @@ +from onyx.background.celery.apps.monitoring import celery_app + +celery_app.autodiscover_tasks( + [ + "ee.onyx.background.celery.tasks.tenant_provisioning", + ] +) diff --git a/backend/ee/onyx/background/celery/apps/primary.py b/backend/ee/onyx/background/celery/apps/primary.py index 733252d32db..4d4016281a6 100644 --- a/backend/ee/onyx/background/celery/apps/primary.py +++ b/backend/ee/onyx/background/celery/apps/primary.py @@ -128,3 +128,11 @@ def autogenerate_usage_report_task(*, tenant_id: str) -> None: user_id=None, period=None, ) + + +celery_app.autodiscover_tasks( + [ + "ee.onyx.background.celery.tasks.doc_permission_syncing", + "ee.onyx.background.celery.tasks.external_group_syncing", + ] +) diff --git a/backend/onyx/background/celery/apps/primary.py b/backend/onyx/background/celery/apps/primary.py index 6c7e2d4e757..954f8392c8c 100644 --- a/backend/onyx/background/celery/apps/primary.py +++ b/backend/onyx/background/celery/apps/primary.py @@ -289,8 +289,6 @@ def stop(self, worker: Any) -> None: "onyx.background.celery.tasks.connector_deletion", "onyx.background.celery.tasks.indexing", "onyx.background.celery.tasks.periodic", - "onyx.background.celery.tasks.doc_permission_syncing", - "onyx.background.celery.tasks.external_group_syncing", "onyx.background.celery.tasks.pruning", "onyx.background.celery.tasks.shared", "onyx.background.celery.tasks.vespa", diff --git a/backend/onyx/background/celery/versioned_apps/light.py b/backend/onyx/background/celery/versioned_apps/light.py index 42834aad19a..20fff856c61 100644 --- a/backend/onyx/background/celery/versioned_apps/light.py +++ b/backend/onyx/background/celery/versioned_apps/light.py @@ -4,15 +4,11 @@ from celery import Celery +from onyx.utils.variable_functionality import fetch_versioned_implementation from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable set_is_ee_based_on_env_variable() - - -def get_app() -> Celery: - from onyx.background.celery.apps.light import celery_app - - return celery_app - - -app = get_app() +app: Celery = fetch_versioned_implementation( + "onyx.background.celery.apps.light", + "celery_app", +) diff --git a/backend/onyx/background/celery/versioned_apps/monitoring.py b/backend/onyx/background/celery/versioned_apps/monitoring.py index c586ad02be0..d9e78985731 100644 --- a/backend/onyx/background/celery/versioned_apps/monitoring.py +++ b/backend/onyx/background/celery/versioned_apps/monitoring.py @@ -2,15 +2,11 @@ from celery import Celery +from onyx.utils.variable_functionality import fetch_versioned_implementation from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable set_is_ee_based_on_env_variable() - - -def get_app() -> Celery: - from onyx.background.celery.apps.monitoring import celery_app - - return celery_app - - -app = get_app() +app: Celery = fetch_versioned_implementation( + "onyx.background.celery.apps.monitoring", + "celery_app", +) From 999afce11da00c0bd6c41282e8a07bdf109769fb Mon Sep 17 00:00:00 2001 From: Weves Date: Sun, 11 May 2025 14:10:22 -0700 Subject: [PATCH 9/9] Fix import --- backend/ee/onyx/background/celery/apps/light.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/ee/onyx/background/celery/apps/light.py b/backend/ee/onyx/background/celery/apps/light.py index 1930a8d2923..ea30b7d6e7a 100644 --- a/backend/ee/onyx/background/celery/apps/light.py +++ b/backend/ee/onyx/background/celery/apps/light.py @@ -1,4 +1,4 @@ -from onyx.background.celery.apps.monitoring import celery_app +from onyx.background.celery.apps.light import celery_app celery_app.autodiscover_tasks( [