Remove hardcoded image extraction flag for PDFs

emerzon · Weves · commit 82724826ce30 · 2025-07-01T13:57:36.000-07:00
PDFs currently always have their images extracted.
This will make use of the "Enable Image Extraction and Analysis" workspace configuration instead.
diff --git a/backend/onyx/file_processing/extract_file_text.py b/backend/onyx/file_processing/extract_file_text.py
@@ -28,6 +28,7 @@
 
 from onyx.configs.constants import FileOrigin
 from onyx.configs.constants import ONYX_METADATA_FILENAME
+from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
 from onyx.file_processing.html_utils import parse_html_page_basic
 from onyx.file_processing.unstructured import get_unstructured_api_key
 from onyx.file_processing.unstructured import unstructured_to_text
@@ -533,7 +534,7 @@ def extract_text_and_images(
         if extension == ".pdf":
             file.seek(0)
             text_content, pdf_metadata, images = read_pdf_file(
-                file, pdf_pass, extract_images=True
+                file, pdf_pass, extract_images=get_image_extraction_and_analysis_enabled()
             )
             return ExtractionResult(
                 text_content=text_content, embedded_images=images, metadata=pdf_metadata