Skip to content

Commit a4607a0

Browse files
committed
.
1 parent 0a5c7d8 commit a4607a0

File tree

4 files changed

+611
-0
lines changed

4 files changed

+611
-0
lines changed

.pre-commit-config.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,15 @@ repos:
3737
additional_dependencies:
3838
- prettier
3939

40+
- repo: local
41+
hooks:
42+
- id: check-lazy-imports
43+
name: Check lazy imports are not directly imported
44+
entry: python backend/scripts/check_lazy_imports.py
45+
language: system
46+
files: ^backend/.*\.py$
47+
pass_filenames: false
48+
4049
# We would like to have a mypy pre-commit hook, but due to the fact that
4150
# pre-commit runs in it's own isolated environment, we would need to install
4251
# and keep in sync all dependencies so mypy has access to the appropriate type
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
"""
2+
Pre-commit hook to ensure modules in lazy_import_registry.py are only imported lazily.
3+
4+
This script prevents direct imports of modules that should be lazily loaded,
5+
helping maintain memory optimization and preventing import-time dependencies.
6+
"""
7+
8+
import logging
9+
import re
10+
import sys
11+
from pathlib import Path
12+
from typing import Dict
13+
from typing import List
14+
from typing import Set
15+
16+
# Configure the logger
17+
logging.basicConfig(
18+
level=logging.INFO, # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
19+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", # Log format
20+
handlers=[logging.StreamHandler()], # Output logs to console
21+
)
22+
23+
logger = logging.getLogger(__name__)
24+
25+
26+
def get_lazy_modules(registry_path: Path) -> Dict[str, str]:
27+
"""
28+
Extract lazy module mappings from the registry file.
29+
30+
Returns:
31+
Dict mapping lazy variable names to actual module names
32+
e.g., {"lazy_vertexai": "vertexai"}
33+
"""
34+
lazy_modules = {}
35+
36+
try:
37+
content = registry_path.read_text(encoding="utf-8")
38+
39+
# Pattern to match: lazy_varname: "modulename" = LazyModule("modulename")
40+
pattern = (
41+
r'(lazy_\w+):\s*["\']([^"\']+)["\']\s*=\s*LazyModule\(["\']([^"\']+)["\']\)'
42+
)
43+
44+
for match in re.finditer(pattern, content):
45+
lazy_var, type_hint, module_name = match.groups()
46+
lazy_modules[lazy_var] = module_name
47+
48+
except FileNotFoundError:
49+
print(f"Error: Could not find lazy import registry at {registry_path}")
50+
return {}
51+
except Exception as e:
52+
print(f"Error reading registry file: {e}")
53+
return {}
54+
55+
return lazy_modules
56+
57+
58+
def find_direct_imports(file_path: Path, protected_modules: Set[str]) -> List[tuple]:
59+
"""
60+
Find direct imports of protected modules in a given file.
61+
62+
Args:
63+
file_path: Path to Python file to check
64+
protected_modules: Set of module names that should only be imported lazily
65+
66+
Returns:
67+
List of (line_number, line_content) tuples for violations
68+
"""
69+
violations = []
70+
71+
try:
72+
content = file_path.read_text(encoding="utf-8")
73+
lines = content.split("\n")
74+
75+
in_type_checking = False
76+
type_checking_indent = None
77+
78+
for line_num, line in enumerate(lines, 1):
79+
stripped = line.strip()
80+
81+
# Track TYPE_CHECKING blocks to allow imports there
82+
if "TYPE_CHECKING" in stripped and "if" in stripped:
83+
in_type_checking = True
84+
# Capture the indentation level to know when we exit the block
85+
type_checking_indent = len(line) - len(line.lstrip())
86+
continue
87+
88+
# Exit TYPE_CHECKING block when we see code at same or less indentation
89+
if in_type_checking and stripped and not stripped.startswith("#"):
90+
current_indent = len(line) - len(line.lstrip())
91+
if current_indent <= type_checking_indent:
92+
in_type_checking = False
93+
94+
# Skip comments and empty lines
95+
if not stripped or stripped.startswith("#"):
96+
continue
97+
98+
# Allow imports in TYPE_CHECKING blocks
99+
if in_type_checking:
100+
continue
101+
102+
# Check for direct imports of protected modules
103+
for module in protected_modules:
104+
# Pattern 1: import module
105+
if re.match(rf"^import\s+{re.escape(module)}(\s|$|\.)", stripped):
106+
violations.append((line_num, line))
107+
108+
# Pattern 2: from module import ...
109+
elif re.match(rf"^from\s+{re.escape(module)}(\s|\.|$)", stripped):
110+
violations.append((line_num, line))
111+
112+
# Pattern 3: from ... import module (less common but possible)
113+
elif re.search(
114+
rf"^from\s+[\w.]+\s+import\s+.*\b{re.escape(module)}\b", stripped
115+
):
116+
violations.append((line_num, line))
117+
118+
except Exception as e:
119+
print(f"Error reading {file_path}: {e}")
120+
121+
return violations
122+
123+
124+
def main() -> int:
125+
backend_dir = Path(__file__).parent.parent # Go up from scripts/ to backend/
126+
registry_path = backend_dir / "onyx" / "lazy_handling" / "lazy_import_registry.py"
127+
128+
# Get lazy modules from registry
129+
lazy_modules = get_lazy_modules(registry_path)
130+
131+
if not lazy_modules:
132+
logger.info("No lazy modules found in registry or error reading registry")
133+
return 0
134+
135+
protected_modules = set(lazy_modules.values())
136+
logger.info(
137+
f"Checking for direct imports of lazy modules: {', '.join(protected_modules)}"
138+
)
139+
140+
# Find all Python files in backend (excluding the registry itself)
141+
python_files = []
142+
for pattern in ["**/*.py"]:
143+
for file_path in backend_dir.glob(pattern):
144+
# Skip the registry file itself
145+
try:
146+
if file_path.samefile(registry_path):
147+
continue
148+
except (OSError, FileNotFoundError):
149+
# Handle case where files don't exist or can't be compared
150+
if file_path == registry_path:
151+
continue
152+
# Skip __pycache__ and other non-source directories
153+
if any(
154+
part.startswith(".") or part == "__pycache__"
155+
for part in file_path.parts
156+
):
157+
continue
158+
# Skip test files (they can contain test imports)
159+
# Check if it's in a tests directory or has test in filename
160+
path_parts = file_path.parts
161+
if (
162+
"tests" in path_parts
163+
or file_path.name.startswith("test_")
164+
or file_path.name.endswith("_test.py")
165+
):
166+
continue
167+
python_files.append(file_path)
168+
169+
violations_found = False
170+
171+
# Check each Python file
172+
for file_path in python_files:
173+
violations = find_direct_imports(file_path, protected_modules)
174+
175+
if violations:
176+
violations_found = True
177+
rel_path = file_path.relative_to(backend_dir)
178+
logger.info(f"\n❌ Direct import violations found in {rel_path}:")
179+
180+
for line_num, line in violations:
181+
logger.info(f" Line {line_num}: {line.strip()}")
182+
183+
# Suggest fix
184+
for module in protected_modules:
185+
if any(module in line for _, line in violations):
186+
lazy_var = next(
187+
var for var, mod in lazy_modules.items() if mod == module
188+
)
189+
logger.info(
190+
f" 💡 Use: from onyx.lazy_handling.lazy_import_registry import {lazy_var}"
191+
)
192+
193+
if violations_found:
194+
logger.info(
195+
"\n🚫 Found direct imports of lazy modules. Please use the lazy imports from the registry."
196+
)
197+
return 1
198+
else:
199+
logger.info("✅ All lazy modules are properly imported through the registry!")
200+
return 0
201+
202+
203+
if __name__ == "__main__":
204+
sys.exit(main())

backend/tests/unit/scripts/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)