File tree Expand file tree Collapse file tree 1 file changed +20
-0
lines changed
backend/onyx/connectors/sharepoint Expand file tree Collapse file tree 1 file changed +20
-0
lines changed Original file line number Diff line number Diff line change 59
59
from onyx .file_processing .extract_file_text import get_file_ext
60
60
from onyx .file_processing .file_validation import EXCLUDED_IMAGE_TYPES
61
61
from onyx .file_processing .image_utils import store_image_and_create_section
62
+ from onyx .utils .b64 import get_image_type_from_bytes
62
63
from onyx .utils .logger import setup_logger
63
64
64
65
logger = setup_logger ()
@@ -395,6 +396,25 @@ def _convert_driveitem_to_document_with_permissions(
395
396
else :
396
397
# Note: we don't process Onyx metadata for connectors like Drive & Sharepoint, but could
397
398
def _store_embedded_image (img_data : bytes , img_name : str ) -> None :
399
+ try :
400
+ mime_type = get_image_type_from_bytes (img_data )
401
+ except ValueError :
402
+ logger .debug (
403
+ "Skipping embedded image with unknown format for %s" ,
404
+ driveitem .name ,
405
+ )
406
+ return
407
+
408
+ # The only mime type that would be returned by get_image_type_from_bytes that is in
409
+ # EXCLUDED_IMAGE_TYPES is image/gif.
410
+ if mime_type in EXCLUDED_IMAGE_TYPES :
411
+ logger .debug (
412
+ "Skipping embedded image of excluded type %s for %s" ,
413
+ mime_type ,
414
+ driveitem .name ,
415
+ )
416
+ return
417
+
398
418
image_section , _ = store_image_and_create_section (
399
419
image_data = img_data ,
400
420
file_id = f"{ driveitem .id } _img_{ len (sections )} " ,
You can’t perform that action at this time.
0 commit comments