Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions backend/onyx/connectors/google_drive/doc_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from onyx.connectors.google_drive.section_extraction import HEADING_DELIMITER
from onyx.connectors.google_utils.resources import get_drive_service
from onyx.connectors.google_utils.resources import get_google_docs_service
from onyx.connectors.google_utils.resources import GoogleDocsService
from onyx.connectors.google_utils.resources import GoogleDriveService
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
Expand All @@ -36,7 +37,6 @@
from onyx.file_processing.extract_file_text import xlsx_to_text
from onyx.file_processing.file_validation import is_valid_image_type
from onyx.file_processing.image_utils import store_image_and_create_section
from onyx.utils.lazy import lazy_eval
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import (
fetch_versioned_implementation_with_fallback,
Expand Down Expand Up @@ -146,7 +146,9 @@ def _download_and_extract_sections_basic(

# For other file types, download the file
# Use the correct API call for downloading files
response_call = lazy_eval(lambda: download_request(service, file_id))
# lazy evaluation to only download the file if necessary
def response_call() -> bytes:
return download_request(service, file_id)

# Process based on mime type
if mime_type == "text/plain":
Expand Down Expand Up @@ -418,13 +420,14 @@ def _convert_drive_item_to_document(
Main entry point for converting a Google Drive file => Document object.
"""
sections: list[TextSection | ImageSection] = []

# Only construct these services when needed
drive_service = lazy_eval(
lambda: get_drive_service(creds, user_email=retriever_email)
)
docs_service = lazy_eval(
lambda: get_google_docs_service(creds, user_email=retriever_email)
)
def _get_drive_service() -> GoogleDriveService:
return get_drive_service(creds, user_email=retriever_email)

def _get_docs_service() -> GoogleDocsService:
return get_google_docs_service(creds, user_email=retriever_email)

doc_id = "unknown"

try:
Expand All @@ -438,14 +441,14 @@ def _convert_drive_item_to_document(
try:
# get_document_sections is the advanced approach for Google Docs
doc_sections = get_document_sections(
docs_service=docs_service(),
docs_service=_get_docs_service(),
doc_id=file.get("id", ""),
)
if doc_sections:
sections = cast(list[TextSection | ImageSection], doc_sections)
if any(SMART_CHIP_CHAR in section.text for section in doc_sections):
basic_sections = _download_and_extract_sections_basic(
file, drive_service(), allow_images
file, _get_drive_service(), allow_images
)
sections = align_basic_advanced(basic_sections, doc_sections)

Expand All @@ -470,7 +473,7 @@ def _convert_drive_item_to_document(
# If we don't have sections yet, use the basic extraction method
if not sections:
sections = _download_and_extract_sections_basic(
file, drive_service(), allow_images
file, _get_drive_service(), allow_images
)

# If we still don't have any sections, skip this file
Expand Down
13 changes: 0 additions & 13 deletions backend/onyx/utils/lazy.py

This file was deleted.