Skip to content

Commit e2b3a6e

Browse files
authored
fix: drive external links (#5079)
1 parent 4f04b09 commit e2b3a6e

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

backend/onyx/file_processing/extract_file_text.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,18 @@ def docx_to_text_and_images(
341341

342342
for rel_id, rel in doc.part.rels.items():
343343
if "image" in rel.reltype:
344-
# image is typically in rel.target_part.blob
345-
image_bytes = rel.target_part.blob
344+
# Skip images that are linked rather than embedded (TargetMode="External")
345+
if getattr(rel, "is_external", False):
346+
continue
347+
348+
try:
349+
# image is typically in rel.target_part.blob
350+
image_bytes = rel.target_part.blob
351+
except ValueError:
352+
# Safeguard against relationships that lack an internal target_part
353+
# (e.g., external relationships or other anomalies)
354+
continue
355+
346356
image_name = rel.target_part.partname
347357
# store
348358
embedded_images.append((image_bytes, os.path.basename(str(image_name))))

0 commit comments

Comments
 (0)