Skip to content

Commit dce7e7f

Browse files
committed
feat: enhance file modification tracking and add tests for edge cases
1 parent 388aee8 commit dce7e7f

File tree

6 files changed

+1163
-25
lines changed

6 files changed

+1163
-25
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ LibreChat is configured to use the code interpreter API by default.
3535
To configure LibreChat to use the local code interpreter, set the following environment variables in LibreChat:
3636

3737
```ini
38-
# LIBRECHAT_CODE_API_KEY=... currently not needed
38+
LIBRECHAT_CODE_API_KEY=<any-value-here>
3939
LIBRECHAT_CODE_BASEURL=http(s)://host:port/v1/librechat # for local testing use to point to host IP http://host.docker.internal:8000/v1/librechat
4040
```
4141

app/services/docker_executor.py

Lines changed: 128 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import docker
22
from loguru import logger
33
from pathlib import Path
4-
from typing import Dict, List, Optional, Any, Literal
4+
from typing import Dict, List, Optional, Any, Literal, Set, Tuple
55
import time
66
from docker.errors import APIError, ImageNotFound
77
import asyncio
@@ -15,6 +15,7 @@
1515
from app.utils.generate_id import generate_id
1616
import aiodocker
1717
import json
18+
import os
1819

1920
from ..shared.config import get_settings
2021
from .database import db_manager
@@ -30,6 +31,16 @@ class ContainerMetrics:
3031
cpu_usage: float = 0.0
3132

3233

34+
@dataclass
35+
class FileState:
36+
"""Tracks the state of a file for change detection."""
37+
path: Path
38+
size: int
39+
mtime: float
40+
md5_hash: str
41+
exists: bool = True
42+
43+
3344
class DockerExecutor:
3445
"""Executes code in Docker containers with file management."""
3546

@@ -96,6 +107,94 @@ def _file_lock(self, path: Path):
96107
lock_file.close()
97108
lock_path.unlink(missing_ok=True)
98109

110+
def _scan_directory(self, directory: Path) -> Dict[str, FileState]:
111+
"""
112+
Recursively scan a directory and collect file states.
113+
Returns a dictionary mapping relative file paths to their FileState objects.
114+
"""
115+
file_states = {}
116+
117+
if not directory.exists():
118+
logger.warning(f"Directory {directory} does not exist")
119+
return file_states
120+
121+
# Walk through the directory recursively
122+
for root, _, files in os.walk(directory):
123+
root_path = Path(root)
124+
125+
# Compute relative path from the base directory
126+
rel_root = root_path.relative_to(directory)
127+
128+
for filename in files:
129+
# Skip lock files
130+
if filename.endswith('.lock'):
131+
continue
132+
133+
file_path = root_path / filename
134+
135+
# Compute relative path for dictionary key
136+
if rel_root == Path('.'):
137+
rel_path = filename
138+
else:
139+
rel_path = str(rel_root / filename)
140+
141+
try:
142+
# Get file stats
143+
stat = file_path.stat()
144+
size = stat.st_size
145+
mtime = stat.st_mtime
146+
147+
# Calculate MD5 hash for content comparison
148+
md5_hash = hashlib.md5(file_path.read_bytes()).hexdigest()
149+
150+
# Store file state
151+
file_states[rel_path] = FileState(
152+
path=file_path,
153+
size=size,
154+
mtime=mtime,
155+
md5_hash=md5_hash
156+
)
157+
logger.debug(f"Scanned file: {rel_path}, size: {size}, hash: {md5_hash}")
158+
except (PermissionError, FileNotFoundError) as e:
159+
logger.warning(f"Error scanning file {file_path}: {str(e)}")
160+
continue
161+
162+
return file_states
163+
164+
def _find_changed_files(self,
165+
before_states: Dict[str, FileState],
166+
after_states: Dict[str, FileState]) -> Set[str]:
167+
"""
168+
Compare before and after file states to identify new or modified files.
169+
Returns a set of relative paths of changed files.
170+
"""
171+
changed_files = set()
172+
173+
# Find new or modified files
174+
for rel_path, after_state in after_states.items():
175+
if rel_path not in before_states:
176+
# New file
177+
logger.info(f"New file detected: {rel_path}")
178+
changed_files.add(rel_path)
179+
else:
180+
before_state = before_states[rel_path]
181+
# Check if file was modified (size, hash, or timestamp changed)
182+
if (before_state.size != after_state.size or
183+
before_state.md5_hash != after_state.md5_hash):
184+
logger.info(f"Modified file detected: {rel_path}, before={before_state.size}:{before_state.md5_hash}, after={after_state.size}:{after_state.md5_hash}")
185+
changed_files.add(rel_path)
186+
else:
187+
logger.info(f"Unchanged file: {rel_path}, size={after_state.size}, hash={after_state.md5_hash}")
188+
189+
# Add debug logs for summarizing scan results
190+
for rel_path in before_states:
191+
if rel_path not in after_states:
192+
logger.info(f"File deleted: {rel_path}")
193+
194+
logger.info(f"Before scan: {len(before_states)} files, After scan: {len(after_states)} files, Changed: {len(changed_files)} files")
195+
196+
return changed_files
197+
99198
async def _update_container_metrics(self, container) -> None:
100199
"""Update metrics for a running container."""
101200
try:
@@ -200,6 +299,11 @@ async def execute(
200299
logger.info(f"Session directory contents: {list(session_path.glob('*'))}")
201300
logger.info(f"Code to execute: {code}")
202301

302+
# Scan directory before execution to track file state
303+
logger.info(f"Scanning directory {session_path} before code execution")
304+
before_file_states = self._scan_directory(session_path)
305+
logger.info(f"Found {len(before_file_states)} files before execution")
306+
203307
async with self._container_semaphore:
204308
try:
205309
# Ensure the image is available
@@ -333,32 +437,46 @@ async def execute(
333437
if exec_inspect["ExitCode"] != 0:
334438
return {"stdout": "", "stderr": output_text, "status": "error", "files": []}
335439

336-
# List files in the session directory
440+
# Scan directory after execution to detect changes
441+
logger.info(f"Scanning directory {session_path} after code execution")
442+
after_file_states = self._scan_directory(session_path)
443+
logger.info(f"Found {len(after_file_states)} files after execution")
444+
445+
# Identify changed files
446+
changed_file_paths = self._find_changed_files(before_file_states, after_file_states)
447+
logger.info(f"Detected {len(changed_file_paths)} changed files: {changed_file_paths}")
448+
449+
# Process only new or modified files
337450
output_files = []
338451
existing_filenames = {file["name"] for file in (files or [])}
339452
logger.info(f"Existing filenames: {existing_filenames}")
340-
logger.info(f"Scanning directory {session_path} for created files")
341-
for file_path in session_path.glob("*"):
342-
if file_path.is_file() and file_path.name not in existing_filenames:
453+
454+
for rel_path in changed_file_paths:
455+
file_path = session_path / rel_path
456+
if file_path.is_file():
343457
file_id = generate_id()
344458
file_size = file_path.stat().st_size
345-
logger.info(f"Found new file: {file_path}, size: {file_size}")
459+
logger.info(f"Processing changed file: {file_path}, size: {file_size}")
346460

347461
# Calculate file metadata
348462
content_type, _ = mimetypes.guess_type(file_path.name) or ("application/octet-stream", None)
349463
etag = hashlib.md5(str(file_path.stat().st_mtime).encode()).hexdigest()
350464

351465
# Prepare file data for database
466+
# Use directory structure in filepath if present
467+
filepath = f"{session_id}/{rel_path}"
468+
filename = Path(rel_path).name
469+
352470
file_data = {
353471
"id": file_id,
354472
"session_id": session_id,
355-
"filename": file_path.name,
356-
"filepath": session_id + "/" + file_path.name,
473+
"filename": filename, # This is used by the API to convert to FileRef.name
474+
"filepath": filepath,
357475
"size": file_size,
358476
"content_type": content_type,
359-
"original_filename": file_path.name,
477+
"original_filename": filename,
360478
"etag": etag,
361-
"name": f"{session_id}/{file_id}/{file_path.name}",
479+
"name": f"{session_id}/{file_id}/{filename}", # Full path for storage/reference
362480
}
363481
logger.info(f"Saving file metadata to database: {file_data}")
364482

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ readme = "README.md"
66
requires-python = ">=3.11"
77
dependencies = [
88
"fastapi[standard]>=0.115.8",
9-
# "uvicorn>=0.27.1",
109
"python-multipart>=0.0.9",
1110
"pydantic>=2.10.6",
1211
"pydantic-settings>=2.8.0",

0 commit comments

Comments
 (0)