diff --git a/backend/onyx/connectors/google_drive/doc_conversion.py b/backend/onyx/connectors/google_drive/doc_conversion.py index 29aa750ee9..21e68f7b0a 100644 --- a/backend/onyx/connectors/google_drive/doc_conversion.py +++ b/backend/onyx/connectors/google_drive/doc_conversion.py @@ -19,6 +19,7 @@ from onyx.connectors.google_drive.section_extraction import HEADING_DELIMITER from onyx.connectors.google_utils.resources import get_drive_service from onyx.connectors.google_utils.resources import get_google_docs_service +from onyx.connectors.google_utils.resources import GoogleDocsService from onyx.connectors.google_utils.resources import GoogleDriveService from onyx.connectors.models import ConnectorFailure from onyx.connectors.models import Document @@ -36,7 +37,6 @@ from onyx.file_processing.extract_file_text import xlsx_to_text from onyx.file_processing.file_validation import is_valid_image_type from onyx.file_processing.image_utils import store_image_and_create_section -from onyx.utils.lazy import lazy_eval from onyx.utils.logger import setup_logger from onyx.utils.variable_functionality import ( fetch_versioned_implementation_with_fallback, @@ -146,7 +146,9 @@ def _download_and_extract_sections_basic( # For other file types, download the file # Use the correct API call for downloading files - response_call = lazy_eval(lambda: download_request(service, file_id)) + # lazy evaluation to only download the file if necessary + def response_call() -> bytes: + return download_request(service, file_id) # Process based on mime type if mime_type == "text/plain": @@ -418,13 +420,14 @@ def _convert_drive_item_to_document( Main entry point for converting a Google Drive file => Document object. """ sections: list[TextSection | ImageSection] = [] + # Only construct these services when needed - drive_service = lazy_eval( - lambda: get_drive_service(creds, user_email=retriever_email) - ) - docs_service = lazy_eval( - lambda: get_google_docs_service(creds, user_email=retriever_email) - ) + def _get_drive_service() -> GoogleDriveService: + return get_drive_service(creds, user_email=retriever_email) + + def _get_docs_service() -> GoogleDocsService: + return get_google_docs_service(creds, user_email=retriever_email) + doc_id = "unknown" try: @@ -438,14 +441,14 @@ def _convert_drive_item_to_document( try: # get_document_sections is the advanced approach for Google Docs doc_sections = get_document_sections( - docs_service=docs_service(), + docs_service=_get_docs_service(), doc_id=file.get("id", ""), ) if doc_sections: sections = cast(list[TextSection | ImageSection], doc_sections) if any(SMART_CHIP_CHAR in section.text for section in doc_sections): basic_sections = _download_and_extract_sections_basic( - file, drive_service(), allow_images + file, _get_drive_service(), allow_images ) sections = align_basic_advanced(basic_sections, doc_sections) @@ -470,7 +473,7 @@ def _convert_drive_item_to_document( # If we don't have sections yet, use the basic extraction method if not sections: sections = _download_and_extract_sections_basic( - file, drive_service(), allow_images + file, _get_drive_service(), allow_images ) # If we still don't have any sections, skip this file diff --git a/backend/onyx/utils/lazy.py b/backend/onyx/utils/lazy.py deleted file mode 100644 index 5df9207ad3..0000000000 --- a/backend/onyx/utils/lazy.py +++ /dev/null @@ -1,13 +0,0 @@ -from collections.abc import Callable -from functools import lru_cache -from typing import TypeVar - -R = TypeVar("R") - - -def lazy_eval(func: Callable[[], R]) -> Callable[[], R]: - @lru_cache(maxsize=1) - def lazy_func() -> R: - return func() - - return lazy_func