Skip to content

Commit b53f132

Browse files
committed
feat: add codebase indexing service and improve configuration handling
- Add new CodebaseIndexService for providing project context to AI models - Rename getAIInstructions to getAdditionalInstructions for clarity - Update launch configuration with proposed API enablement - Apply linting fixes across service files - Update dependencies in package-lock.json The codebase indexing service enables better AI-generated commit messages by analyzing project structure and key files.
1 parent 869eb03 commit b53f132

File tree

6 files changed

+199
-14
lines changed

6 files changed

+199
-14
lines changed

.vscode/launch.json

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,22 @@
99
"name": "Run Extension",
1010
"type": "extensionHost",
1111
"request": "launch",
12-
"args": ["--extensionDevelopmentPath=${workspaceFolder}"],
12+
"args": [
13+
"--extensionDevelopmentPath=${workspaceFolder}",
14+
"--enable-proposed-api=hitclaw.diffy-explain-ai"
15+
],
1316
"outFiles": ["${workspaceFolder}/dist/**/*.js"],
1417
"preLaunchTask": "${defaultBuildTask}"
1518
},
1619
{
1720
"name": "Run Extension (Disable Other Extentions)",
1821
"type": "extensionHost",
1922
"request": "launch",
20-
"args": ["--extensionDevelopmentPath=${workspaceFolder}", "--disable-extensions"],
23+
"args": [
24+
"--extensionDevelopmentPath=${workspaceFolder}",
25+
"--disable-extensions",
26+
"--enable-proposed-api=hitclaw.diffy-explain-ai"
27+
],
2128
"outFiles": ["${workspaceFolder}/dist/**/*.js"],
2229
"preLaunchTask": "${defaultBuildTask}"
2330
},

package-lock.json

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import { countTokens } from "gpt-tokenizer";
2+
import * as vscode from "vscode";
3+
import { sendToOutput } from "../utils/log";
4+
import WorkspaceService from "./WorkspaceService";
5+
6+
/**
7+
* Service for indexing codebase files to provide context to AI models
8+
*/
9+
export default class CodebaseIndexService {
10+
static _instance: CodebaseIndexService;
11+
private readonly MAX_TOTAL_TOKENS = 5000; // Maximum tokens for all indexed files combined
12+
private readonly KB_TO_BYTES = 1024;
13+
14+
private constructor() {}
15+
16+
/**
17+
* Returns instance of the class
18+
* @returns {CodebaseIndexService} The instance of the class.
19+
*/
20+
public static getInstance(): CodebaseIndexService {
21+
if (!CodebaseIndexService._instance) {
22+
CodebaseIndexService._instance = new CodebaseIndexService();
23+
}
24+
return CodebaseIndexService._instance;
25+
}
26+
27+
/**
28+
* Get codebase context by reading and indexing specified files
29+
* @returns {Promise<string | null>} Formatted context string or null if disabled/error
30+
*/
31+
async getCodebaseContext(): Promise<string | null> {
32+
const workspaceService = WorkspaceService.getInstance();
33+
34+
// Check if codebase indexing is enabled
35+
if (!workspaceService.getEnableCodebaseContext()) {
36+
return null;
37+
}
38+
39+
const workspaceFolder = workspaceService.getCurrentWorkspace();
40+
if (!workspaceFolder) {
41+
sendToOutput("No workspace folder found for codebase indexing");
42+
return null;
43+
}
44+
45+
const indexedFiles = workspaceService.getIndexedFiles();
46+
const maxFileSizeKB = workspaceService.getMaxIndexedFileSize();
47+
const maxFileSizeBytes = maxFileSizeKB * this.KB_TO_BYTES;
48+
49+
if (!indexedFiles || indexedFiles.length === 0) {
50+
sendToOutput("No files configured for indexing");
51+
return null;
52+
}
53+
54+
sendToOutput(`Starting codebase indexing with max file size: ${maxFileSizeKB}KB`);
55+
sendToOutput(`Files to index: ${indexedFiles.join(", ")}`);
56+
57+
const indexedContent: Array<{
58+
file: string;
59+
content: string;
60+
tokens: number;
61+
}> = [];
62+
let totalTokens = 0;
63+
64+
for (const filePattern of indexedFiles) {
65+
try {
66+
const fileUri = vscode.Uri.joinPath(workspaceFolder.uri, filePattern);
67+
68+
// Check if file exists
69+
try {
70+
const fileStat = await vscode.workspace.fs.stat(fileUri);
71+
72+
// Skip if file is too large
73+
if (fileStat.size > maxFileSizeBytes) {
74+
sendToOutput(
75+
`Skipping ${filePattern}: file size ${(fileStat.size / this.KB_TO_BYTES).toFixed(
76+
1,
77+
)}KB exceeds limit of ${maxFileSizeKB}KB`,
78+
);
79+
continue;
80+
}
81+
82+
// Read file content
83+
const fileContent = await vscode.workspace.fs.readFile(fileUri);
84+
const content = Buffer.from(fileContent).toString("utf8");
85+
86+
// Count tokens
87+
const tokenCount = countTokens(content);
88+
89+
// Check if adding this file would exceed total token budget
90+
if (totalTokens + tokenCount > this.MAX_TOTAL_TOKENS) {
91+
sendToOutput(
92+
`Skipping ${filePattern}: would exceed total token budget (${
93+
totalTokens + tokenCount
94+
} > ${this.MAX_TOTAL_TOKENS})`,
95+
);
96+
continue;
97+
}
98+
99+
indexedContent.push({
100+
file: filePattern,
101+
content: content.trim(),
102+
tokens: tokenCount,
103+
});
104+
105+
totalTokens += tokenCount;
106+
sendToOutput(`Indexed ${filePattern}: ${tokenCount} tokens`);
107+
} catch {
108+
// File doesn't exist, skip silently
109+
sendToOutput(`File not found: ${filePattern}`);
110+
}
111+
} catch (error) {
112+
sendToOutput(`Error reading ${filePattern}: ${error}`);
113+
}
114+
}
115+
116+
if (indexedContent.length === 0) {
117+
sendToOutput("No files were successfully indexed");
118+
return null;
119+
}
120+
121+
// Format the context for AI
122+
const contextParts = indexedContent.map(
123+
(item) => `### ${item.file}\n\`\`\`\n${item.content}\n\`\`\``,
124+
);
125+
126+
const formattedContext = `
127+
## PROJECT CONTEXT (${totalTokens} tokens from ${indexedContent.length} files)
128+
129+
The following files provide context about the project structure and dependencies:
130+
131+
${contextParts.join("\n\n")}
132+
133+
Use this context to understand the project's technology stack, dependencies, and architecture when generating commit messages.
134+
`.trim();
135+
136+
sendToOutput(
137+
`Codebase indexing complete: ${indexedContent.length} files, ${totalTokens} total tokens`,
138+
);
139+
140+
return formattedContext;
141+
}
142+
}

src/service/GeminiService.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class GeminiService implements AIService {
3939
increment?: number | undefined;
4040
}>,
4141
): Promise<string | null> {
42-
const instructions = WorkspaceService.getInstance().getAIInstructions();
42+
const instructions = WorkspaceService.getInstance().getAdditionalInstructions();
4343
if (!instructions) {
4444
return null;
4545
}

src/service/OpenAiService.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class OpenAiService implements AIService {
4949
increment?: number | undefined;
5050
}>,
5151
): Promise<string | null> {
52-
const instructions = WorkspaceService.getInstance().getAIInstructions();
52+
const instructions = WorkspaceService.getInstance().getAdditionalInstructions();
5353
if (!instructions) {
5454
return null;
5555
}

src/service/WorkspaceService.ts

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -127,17 +127,12 @@ export default class WorkspaceService extends EventEmitter {
127127
return value;
128128
}
129129

130-
getAIInstructions() {
131-
const value = this.getConfiguration().get("aiInstructions")
132-
? String(this.getConfiguration().get("aiInstructions"))
130+
getAdditionalInstructions() {
131+
const value = this.getConfiguration().get("additionalInstructions")
132+
? String(this.getConfiguration().get("additionalInstructions"))
133133
: undefined;
134-
if (!value) {
135-
this.showErrorMessage(
136-
"Instructions for AI are absent; please provide them within the Diffy Settings section.",
137-
);
138-
return null;
139-
}
140-
return value;
134+
// Return undefined if empty, this is optional now
135+
return value?.trim() ? value : undefined;
141136
}
142137

143138
getTemp() {
@@ -207,6 +202,40 @@ Return ONLY the commit message, no explanations.`;
207202
return typeof value === "number" ? value : 72;
208203
}
209204

205+
getRespectGitignore(): boolean {
206+
const value = this.getConfiguration().get("respectGitignore");
207+
return value === true;
208+
}
209+
210+
getEnableCodebaseContext(): boolean {
211+
const value = this.getConfiguration().get("enableCodebaseContext");
212+
return value === true;
213+
}
214+
215+
getIndexedFiles(): string[] {
216+
const value = this.getConfiguration().get("indexedFiles");
217+
if (Array.isArray(value)) {
218+
return value;
219+
}
220+
// Default indexed files
221+
return [
222+
"package.json",
223+
"README.md",
224+
"Cargo.toml",
225+
"go.mod",
226+
"pom.xml",
227+
"build.gradle",
228+
"pyproject.toml",
229+
"setup.py",
230+
"composer.json",
231+
];
232+
}
233+
234+
getMaxIndexedFileSize(): number {
235+
const value = this.getConfiguration().get("maxIndexedFileSize");
236+
return typeof value === "number" ? value : 50;
237+
}
238+
210239
/**
211240
* This function shows an error message
212241
* @param {string} msg - The message to display.

0 commit comments

Comments
 (0)