Skip to content

Commit a9a2791

Browse files
authored
Find files tool (#881)
1 parent a204611 commit a9a2791

File tree

4 files changed

+118
-0
lines changed

4 files changed

+118
-0
lines changed

src/codegen/extensions/langchain/tools.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from codegen.extensions.tools.replacement_edit import replacement_edit
2424
from codegen.extensions.tools.reveal_symbol import reveal_symbol
2525
from codegen.extensions.tools.search import search
26+
from codegen.extensions.tools.search_files_by_name import search_files_by_name
2627
from codegen.extensions.tools.semantic_edit import semantic_edit
2728
from codegen.extensions.tools.semantic_search import semantic_search
2829
from codegen.sdk.core.codebase import Codebase
@@ -1024,3 +1025,30 @@ def _run(
10241025
result = perform_reflection(context_summary=context_summary, findings_so_far=findings_so_far, current_challenges=current_challenges, reflection_focus=reflection_focus, codebase=self.codebase)
10251026

10261027
return result.render()
1028+
1029+
1030+
class SearchFilesByNameInput(BaseModel):
1031+
"""Input for searching files by name pattern."""
1032+
1033+
pattern: str = Field(..., description="Glob pattern to search for (e.g. '*.py', 'test_*.py')")
1034+
1035+
1036+
class SearchFilesByNameTool(BaseTool):
1037+
"""Tool for searching files by filename across a codebase."""
1038+
1039+
name: ClassVar[str] = "search_files_by_name"
1040+
description: ClassVar[str] = """
1041+
Search for files and directories by glob pattern across the active codebase. This is useful when you need to:
1042+
- Find specific file types (e.g., '*.py', '*.tsx')
1043+
- Locate configuration files (e.g., 'package.json', 'requirements.txt')
1044+
- Find files with specific names (e.g., 'README.md', 'Dockerfile')
1045+
"""
1046+
args_schema: ClassVar[type[BaseModel]] = SearchFilesByNameInput
1047+
codebase: Codebase = Field(exclude=True)
1048+
1049+
def __init__(self, codebase: Codebase):
1050+
super().__init__(codebase=codebase)
1051+
1052+
def _run(self, pattern: str) -> str:
1053+
"""Execute the glob pattern search using fd."""
1054+
return search_files_by_name(self.codebase, pattern).render()

src/codegen/extensions/tools/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from .reveal_symbol import reveal_symbol
2323
from .run_codemod import run_codemod
2424
from .search import search
25+
from .search_files_by_name import search_files_by_name
2526
from .semantic_edit import semantic_edit
2627
from .semantic_search import semantic_search
2728
from .view_file import view_file
@@ -52,6 +53,7 @@
5253
"run_codemod",
5354
# Search operations
5455
"search",
56+
"search_files_by_name",
5557
# Edit operations
5658
"semantic_edit",
5759
"semantic_search",
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import shutil
2+
import subprocess
3+
from typing import ClassVar
4+
5+
from pydantic import Field
6+
7+
from codegen.extensions.tools.observation import Observation
8+
from codegen.sdk.core.codebase import Codebase
9+
from codegen.shared.logging.get_logger import get_logger
10+
11+
logger = get_logger(__name__)
12+
13+
14+
class SearchFilesByNameResultObservation(Observation):
15+
"""Response from searching files by filename pattern."""
16+
17+
pattern: str = Field(
18+
description="The glob pattern that was searched for",
19+
)
20+
files: list[str] = Field(
21+
description="List of matching file paths",
22+
)
23+
24+
str_template: ClassVar[str] = "Found {total} files matching pattern: {pattern}"
25+
26+
@property
27+
def total(self) -> int:
28+
return len(self.files)
29+
30+
31+
def search_files_by_name(
32+
codebase: Codebase,
33+
pattern: str,
34+
) -> SearchFilesByNameResultObservation:
35+
"""Search for files by name pattern in the codebase.
36+
37+
Args:
38+
codebase: The codebase to search in
39+
pattern: Glob pattern to search for (e.g. "*.py", "test_*.py")
40+
"""
41+
try:
42+
if shutil.which("fd") is None:
43+
logger.warning("fd is not installed, falling back to find")
44+
results = subprocess.check_output(
45+
["find", "-name", pattern],
46+
cwd=codebase.repo_path,
47+
timeout=30,
48+
)
49+
files = [path.removeprefix("./") for path in results.decode("utf-8").strip().split("\n")] if results.strip() else []
50+
51+
else:
52+
logger.info(f"Searching for files with pattern: {pattern}")
53+
results = subprocess.check_output(
54+
["fd", "-g", pattern],
55+
cwd=codebase.repo_path,
56+
timeout=30,
57+
)
58+
files = results.decode("utf-8").strip().split("\n") if results.strip() else []
59+
60+
return SearchFilesByNameResultObservation(
61+
status="success",
62+
pattern=pattern,
63+
files=files,
64+
)
65+
66+
except Exception as e:
67+
return SearchFilesByNameResultObservation(
68+
status="error",
69+
error=f"Error searching files: {e!s}",
70+
pattern=pattern,
71+
files=[],
72+
)

tests/unit/codegen/extensions/test_tools.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
replacement_edit,
1616
reveal_symbol,
1717
run_codemod,
18+
search_files_by_name,
1819
semantic_edit,
1920
semantic_search,
2021
view_file,
@@ -282,6 +283,21 @@ def test_move_symbol(codebase):
282283
assert result.target_file == "src/target.py"
283284

284285

286+
def test_search_files_by_name(codebase):
287+
"""Test searching files by name."""
288+
create_file(codebase, "src/main.py", "print('hello')")
289+
create_file(codebase, "src/target.py", "print('world')")
290+
result = search_files_by_name(codebase, "*.py")
291+
assert result.status == "success"
292+
assert len(result.files) == 2
293+
assert "src/main.py" in result.files
294+
assert "src/target.py" in result.files
295+
result = search_files_by_name(codebase, "main.py")
296+
assert result.status == "success"
297+
assert len(result.files) == 1
298+
assert "src/main.py" in result.files
299+
300+
285301
def test_reveal_symbol(codebase):
286302
"""Test revealing symbol relationships."""
287303
result = reveal_symbol(

0 commit comments

Comments
 (0)