Skip to content

Commit 3ffc430

Browse files
feat(parsing): parse logs using regex returned by LLM (#25)
* chore(gitignore): exclude `.idea/` * feat(parsing): adds regex parsing using LLM * chore(package.json): bump version to 0.4.12
1 parent 2a7f6d8 commit 3ffc430

File tree

4 files changed

+482
-3
lines changed

4 files changed

+482
-3
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ node_modules
44
*.log
55
.vscode-test/
66
*.vsix
7+
8+
# JetBrains
9+
.idea/

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "traceback",
33
"displayName": "TraceBack",
44
"description": "A VS Code extension that brings telemetry data (traces, logs, and metrics) into your code.",
5-
"version": "0.4.11",
5+
"version": "0.4.12",
66
"publisher": "hyperdrive-eng",
77
"repository": {
88
"type": "git",

src/claudeService.ts

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,21 @@ export interface CallerAnalysis {
2525
}>;
2626
}
2727

28+
export interface RegexPattern {
29+
pattern: string;
30+
description: string;
31+
extractionMap: Record<string, string>;
32+
}
33+
2834
export class ClaudeService {
2935
private static instance: ClaudeService;
3036
private apiKey: string | undefined;
3137
private apiEndpoint: string = 'https://api.anthropic.com/v1/messages';
3238

3339
// Use a single model for all tasks since we're using the same one
3440
private model: string = 'claude-3-7-sonnet-20250219';
41+
// Use Claude Haiku for regex generation for faster responses and lower cost
42+
private haikuModel: string = 'claude-3-haiku-20240307';
3543

3644
private constructor() {
3745
// Load API key from workspace state if available
@@ -89,6 +97,28 @@ export class ClaudeService {
8997
}
9098
}
9199

100+
/**
101+
* Generate regex patterns for parsing log lines
102+
* @param logSamples Array of log line samples to analyze
103+
* @param expectedResults Optional map of expected parsing results for some samples
104+
* @returns Array of regex patterns with extraction maps
105+
*/
106+
public async generateLogParsingRegex(
107+
logSamples: string[],
108+
expectedResults?: Record<string, any>[]
109+
): Promise<RegexPattern[]> {
110+
if (!this.apiKey) {
111+
throw new Error('Claude API key not set. Please set your API key first.');
112+
}
113+
114+
try {
115+
return await this.callClaudeForRegexPatterns(logSamples, expectedResults);
116+
} catch (error) {
117+
console.error('Error generating regex patterns with Claude:', error);
118+
throw new Error('Failed to generate regex patterns with Claude');
119+
}
120+
}
121+
92122
private async callClaude(logMessage: string, language: string): Promise<LLMLogAnalysis> {
93123
const tools = [{
94124
name: "analyze_log",
@@ -356,6 +386,187 @@ Use the analyze_callers function to return the results in the exact format requi
356386
}
357387
}
358388

389+
private async callClaudeForRegexPatterns(
390+
logSamples: string[],
391+
expectedResults?: Record<string, any>[]
392+
): Promise<RegexPattern[]> {
393+
// Limit number of samples to avoid token limits
394+
const MAX_SAMPLES = 20;
395+
const selectedSamples = logSamples.slice(0, MAX_SAMPLES);
396+
397+
const tools = [{
398+
name: "generate_log_regex",
399+
description: "Generate regex patterns for parsing log lines",
400+
input_schema: {
401+
type: "object",
402+
properties: {
403+
patterns: {
404+
type: "array",
405+
items: {
406+
type: "object",
407+
properties: {
408+
pattern: {
409+
type: "string",
410+
description: "The regular expression pattern in JavaScript syntax"
411+
},
412+
description: {
413+
type: "string",
414+
description: "Description of what this pattern matches"
415+
},
416+
extractionMap: {
417+
type: "object",
418+
description: "Maps regex capture group names to LogEntry fields",
419+
properties: {
420+
severity: {
421+
type: "string",
422+
description: "Capture group name for severity level"
423+
},
424+
timestamp: {
425+
type: "string",
426+
description: "Capture group name for timestamp"
427+
},
428+
message: {
429+
type: "string",
430+
description: "Capture group name for message content"
431+
},
432+
serviceName: {
433+
type: "string",
434+
description: "Capture group name for service name"
435+
},
436+
},
437+
additionalProperties: true
438+
}
439+
},
440+
required: ["pattern", "description", "extractionMap"]
441+
}
442+
}
443+
},
444+
required: ["patterns"]
445+
}
446+
}];
447+
448+
let prompt = `Generate regular expression patterns to parse these log lines into a structured format. We need to extract key components:
449+
450+
1. Severity level (e.g., INFO, DEBUG, ERROR) - if present
451+
2. Message content - the main content of the log (required)
452+
3. Variables - values shown in the log (e.g., "user_id=123") - if present
453+
4. Timestamp - in any format - if present
454+
5. Service name or component - if present
455+
456+
Log samples:
457+
${selectedSamples.map((sample, i) => `${i+1}. ${sample}`).join('\n')}`;
458+
459+
// Add expected results for some samples if provided
460+
if (expectedResults && expectedResults.length > 0) {
461+
prompt += `\n\nFor some log lines, these are examples of the expected parsing results:`;
462+
463+
for (let i = 0; i < Math.min(expectedResults.length, selectedSamples.length); i++) {
464+
prompt += `\n\nLog: ${selectedSamples[i]}\nParsed:`;
465+
466+
Object.entries(expectedResults[i]).forEach(([key, value]) => {
467+
prompt += `\n ${key}: ${JSON.stringify(value)}`;
468+
});
469+
}
470+
}
471+
472+
prompt += `\n
473+
Create one or more regex patterns that collectively handle these different log formats.
474+
475+
For each pattern:
476+
1. Use JavaScript regex syntax with named capture groups (e.g., "(?<severity>INFO|ERROR)")
477+
2. Include a clear description of what types of logs the pattern matches
478+
3. Include an "extractionMap" that maps regex capture group names to LogEntry field names
479+
480+
The pattern should be comprehensive enough to extract:
481+
- severity: The log severity level if present (INFO, DEBUG, ERROR, etc.)
482+
- timestamp: The timestamp in any format, if present
483+
- message: The main log message content
484+
- serviceName: The name of the service or component generating the log
485+
- Any other relevant fields
486+
487+
Ensure the regex patterns:
488+
- Are compatible with JavaScript's regular expression engine
489+
- Use named capture groups for all extracted fields
490+
- Are flexible enough to handle variations in format
491+
- Are precise enough to avoid false positives
492+
- Collectively cover all the provided log samples
493+
- Handle both structured and unstructured log formats
494+
495+
Return the patterns using the generate_log_regex function.`;
496+
497+
const request = {
498+
messages: [{
499+
role: 'user',
500+
content: prompt
501+
}],
502+
model: this.haikuModel, // Use Haiku for regex generation (faster, cheaper)
503+
max_tokens: 4000,
504+
tools: tools,
505+
tool_choice: {
506+
type: "tool",
507+
name: "generate_log_regex"
508+
}
509+
};
510+
511+
try {
512+
const response = await fetch(this.apiEndpoint, {
513+
method: 'POST',
514+
headers: {
515+
'Content-Type': 'application/json',
516+
'X-Api-Key': this.apiKey!,
517+
'anthropic-version': '2023-06-01'
518+
},
519+
body: JSON.stringify(request)
520+
});
521+
522+
let responseData;
523+
try {
524+
responseData = await response.json();
525+
} catch (e) {
526+
throw new Error(`Invalid JSON response: ${e}`);
527+
}
528+
529+
if (!response.ok) {
530+
throw new Error(`Claude API error: ${response.statusText}\nDetails: ${JSON.stringify(responseData)}`);
531+
}
532+
533+
// Validate response structure
534+
if (!responseData.content ||
535+
!Array.isArray(responseData.content) ||
536+
responseData.content.length === 0 ||
537+
responseData.content[0].type !== 'tool_use' ||
538+
!responseData.content[0].input ||
539+
!responseData.content[0].input.patterns) {
540+
throw new Error('Invalid response format from Claude API: Missing required structure');
541+
}
542+
543+
// Extract patterns from response
544+
const patterns = responseData.content[0].input.patterns;
545+
546+
// Validate each pattern
547+
for (const pattern of patterns) {
548+
if (!pattern.pattern || !pattern.description || !pattern.extractionMap) {
549+
console.warn('Invalid pattern in Claude response:', pattern);
550+
continue;
551+
}
552+
553+
// Test if the pattern is a valid regex
554+
try {
555+
new RegExp(pattern.pattern);
556+
} catch (error) {
557+
console.warn(`Invalid regex pattern: ${pattern.pattern}`, error);
558+
// Remove invalid patterns
559+
patterns.splice(patterns.indexOf(pattern), 1);
560+
}
561+
}
562+
563+
return patterns as RegexPattern[];
564+
} catch (error) {
565+
console.error('Error calling Claude API for regex patterns:', error);
566+
throw error;
567+
}
568+
}
569+
359570
/**
360571
* Filter and limit log lines to the most relevant ones for analysis
361572
* @param currentLogLine The log line being analyzed

0 commit comments

Comments
 (0)