Skip to content

Commit 18f13bd

Browse files
committed
Fix pre-commit to exclude .venv directory from lazy import check
1 parent 600cec7 commit 18f13bd

File tree

3 files changed

+17
-1
lines changed

3 files changed

+17
-1
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ repos:
4343
name: Check lazy imports are not directly imported
4444
entry: python3 backend/scripts/check_lazy_imports.py
4545
language: system
46-
files: ^backend/.*\.py$
46+
files: ^backend/(?!\.venv/).*\.py$
4747
pass_filenames: false
4848

4949
# We would like to have a mypy pre-commit hook, but due to the fact that

backend/onyx/connectors/sharepoint/connector.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
from onyx.file_processing.extract_file_text import ACCEPTED_IMAGE_FILE_EXTENSIONS
5858
from onyx.file_processing.extract_file_text import extract_text_and_images
5959
from onyx.file_processing.extract_file_text import get_file_ext
60+
from onyx.file_processing.extract_file_text import is_accepted_file_ext
61+
from onyx.file_processing.extract_file_text import OnyxExtensionType
6062
from onyx.file_processing.file_validation import EXCLUDED_IMAGE_TYPES
6163
from onyx.file_processing.image_utils import store_image_and_create_section
6264
from onyx.utils.b64 import get_image_type_from_bytes
@@ -1441,6 +1443,12 @@ def _load_from_checkpoint(
14411443
)
14421444
for driveitem in driveitems:
14431445
driveitem_extension = get_file_ext(driveitem.name)
1446+
if not is_accepted_file_ext(driveitem_extension, OnyxExtensionType.All):
1447+
logger.warning(
1448+
f"Skipping {driveitem.web_url} as it is not a supported file type"
1449+
)
1450+
continue
1451+
14441452
# Only yield empty documents if they are PDFs or images
14451453
should_yield_if_empty = (
14461454
driveitem_extension in ACCEPTED_IMAGE_FILE_EXTENSIONS
@@ -1464,6 +1472,10 @@ def _load_from_checkpoint(
14641472
TextSection(link=driveitem.web_url, text="")
14651473
]
14661474
yield doc
1475+
else:
1476+
logger.warning(
1477+
f"Skipping {driveitem.web_url} as it is empty and not a PDF or image"
1478+
)
14671479
except Exception as e:
14681480
logger.warning(
14691481
f"Failed to process driveitem {driveitem.web_url}: {e}"

backend/scripts/check_lazy_imports.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ def find_python_files(
104104
if ignore_directories is None:
105105
ignore_directories = set()
106106

107+
# Always ignore virtual environment directories
108+
venv_dirs = {".venv", "venv", ".env", "env", "__pycache__"}
109+
ignore_directories = ignore_directories.union(venv_dirs)
110+
107111
python_files = []
108112
for file_path in backend_dir.glob("**/*.py"):
109113
# Skip test files (they can contain test imports)

0 commit comments

Comments
 (0)