File tree Expand file tree Collapse file tree 1 file changed +11
-2
lines changed
backend/onyx/file_processing Expand file tree Collapse file tree 1 file changed +11
-2
lines changed Original file line number Diff line number Diff line change @@ -313,8 +313,17 @@ def docx_to_text_and_images(
313
313
try :
314
314
doc = docx .Document (file )
315
315
except BadZipFile as e :
316
- logger .warning (f"Failed to extract text from { file_name or 'docx file' } : { e } " )
317
- return "" , []
316
+ logger .warning (
317
+ f"Failed to extract docx { file_name or 'docx file' } : { e } . Attempting to read as text file."
318
+ )
319
+
320
+ # May be an invalid docx, but still a valid text file
321
+ file .seek (0 )
322
+ encoding = detect_encoding (file )
323
+ text_content_raw , _ = read_text_file (
324
+ file , encoding = encoding , ignore_onyx_metadata = False
325
+ )
326
+ return text_content_raw or "" , []
318
327
319
328
# Grab text from paragraphs
320
329
for paragraph in doc .paragraphs :
You can’t perform that action at this time.
0 commit comments