@@ -25,13 +25,21 @@ export interface CallerAnalysis {
25
25
} > ;
26
26
}
27
27
28
+ export interface RegexPattern {
29
+ pattern : string ;
30
+ description : string ;
31
+ extractionMap : Record < string , string > ;
32
+ }
33
+
28
34
export class ClaudeService {
29
35
private static instance : ClaudeService ;
30
36
private apiKey : string | undefined ;
31
37
private apiEndpoint : string = 'https://api.anthropic.com/v1/messages' ;
32
38
33
39
// Use a single model for all tasks since we're using the same one
34
40
private model : string = 'claude-3-7-sonnet-20250219' ;
41
+ // Use Claude Haiku for regex generation for faster responses and lower cost
42
+ private haikuModel : string = 'claude-3-haiku-20240307' ;
35
43
36
44
private constructor ( ) {
37
45
// Load API key from workspace state if available
@@ -89,6 +97,28 @@ export class ClaudeService {
89
97
}
90
98
}
91
99
100
+ /**
101
+ * Generate regex patterns for parsing log lines
102
+ * @param logSamples Array of log line samples to analyze
103
+ * @param expectedResults Optional map of expected parsing results for some samples
104
+ * @returns Array of regex patterns with extraction maps
105
+ */
106
+ public async generateLogParsingRegex (
107
+ logSamples : string [ ] ,
108
+ expectedResults ?: Record < string , any > [ ]
109
+ ) : Promise < RegexPattern [ ] > {
110
+ if ( ! this . apiKey ) {
111
+ throw new Error ( 'Claude API key not set. Please set your API key first.' ) ;
112
+ }
113
+
114
+ try {
115
+ return await this . callClaudeForRegexPatterns ( logSamples , expectedResults ) ;
116
+ } catch ( error ) {
117
+ console . error ( 'Error generating regex patterns with Claude:' , error ) ;
118
+ throw new Error ( 'Failed to generate regex patterns with Claude' ) ;
119
+ }
120
+ }
121
+
92
122
private async callClaude ( logMessage : string , language : string ) : Promise < LLMLogAnalysis > {
93
123
const tools = [ {
94
124
name : "analyze_log" ,
@@ -356,6 +386,187 @@ Use the analyze_callers function to return the results in the exact format requi
356
386
}
357
387
}
358
388
389
+ private async callClaudeForRegexPatterns (
390
+ logSamples : string [ ] ,
391
+ expectedResults ?: Record < string , any > [ ]
392
+ ) : Promise < RegexPattern [ ] > {
393
+ // Limit number of samples to avoid token limits
394
+ const MAX_SAMPLES = 20 ;
395
+ const selectedSamples = logSamples . slice ( 0 , MAX_SAMPLES ) ;
396
+
397
+ const tools = [ {
398
+ name : "generate_log_regex" ,
399
+ description : "Generate regex patterns for parsing log lines" ,
400
+ input_schema : {
401
+ type : "object" ,
402
+ properties : {
403
+ patterns : {
404
+ type : "array" ,
405
+ items : {
406
+ type : "object" ,
407
+ properties : {
408
+ pattern : {
409
+ type : "string" ,
410
+ description : "The regular expression pattern in JavaScript syntax"
411
+ } ,
412
+ description : {
413
+ type : "string" ,
414
+ description : "Description of what this pattern matches"
415
+ } ,
416
+ extractionMap : {
417
+ type : "object" ,
418
+ description : "Maps regex capture group names to LogEntry fields" ,
419
+ properties : {
420
+ severity : {
421
+ type : "string" ,
422
+ description : "Capture group name for severity level"
423
+ } ,
424
+ timestamp : {
425
+ type : "string" ,
426
+ description : "Capture group name for timestamp"
427
+ } ,
428
+ message : {
429
+ type : "string" ,
430
+ description : "Capture group name for message content"
431
+ } ,
432
+ serviceName : {
433
+ type : "string" ,
434
+ description : "Capture group name for service name"
435
+ } ,
436
+ } ,
437
+ additionalProperties : true
438
+ }
439
+ } ,
440
+ required : [ "pattern" , "description" , "extractionMap" ]
441
+ }
442
+ }
443
+ } ,
444
+ required : [ "patterns" ]
445
+ }
446
+ } ] ;
447
+
448
+ let prompt = `Generate regular expression patterns to parse these log lines into a structured format. We need to extract key components:
449
+
450
+ 1. Severity level (e.g., INFO, DEBUG, ERROR) - if present
451
+ 2. Message content - the main content of the log (required)
452
+ 3. Variables - values shown in the log (e.g., "user_id=123") - if present
453
+ 4. Timestamp - in any format - if present
454
+ 5. Service name or component - if present
455
+
456
+ Log samples:
457
+ ${ selectedSamples . map ( ( sample , i ) => `${ i + 1 } . ${ sample } ` ) . join ( '\n' ) } `;
458
+
459
+ // Add expected results for some samples if provided
460
+ if ( expectedResults && expectedResults . length > 0 ) {
461
+ prompt += `\n\nFor some log lines, these are examples of the expected parsing results:` ;
462
+
463
+ for ( let i = 0 ; i < Math . min ( expectedResults . length , selectedSamples . length ) ; i ++ ) {
464
+ prompt += `\n\nLog: ${ selectedSamples [ i ] } \nParsed:` ;
465
+
466
+ Object . entries ( expectedResults [ i ] ) . forEach ( ( [ key , value ] ) => {
467
+ prompt += `\n ${ key } : ${ JSON . stringify ( value ) } ` ;
468
+ } ) ;
469
+ }
470
+ }
471
+
472
+ prompt += `\n
473
+ Create one or more regex patterns that collectively handle these different log formats.
474
+
475
+ For each pattern:
476
+ 1. Use JavaScript regex syntax with named capture groups (e.g., "(?<severity>INFO|ERROR)")
477
+ 2. Include a clear description of what types of logs the pattern matches
478
+ 3. Include an "extractionMap" that maps regex capture group names to LogEntry field names
479
+
480
+ The pattern should be comprehensive enough to extract:
481
+ - severity: The log severity level if present (INFO, DEBUG, ERROR, etc.)
482
+ - timestamp: The timestamp in any format, if present
483
+ - message: The main log message content
484
+ - serviceName: The name of the service or component generating the log
485
+ - Any other relevant fields
486
+
487
+ Ensure the regex patterns:
488
+ - Are compatible with JavaScript's regular expression engine
489
+ - Use named capture groups for all extracted fields
490
+ - Are flexible enough to handle variations in format
491
+ - Are precise enough to avoid false positives
492
+ - Collectively cover all the provided log samples
493
+ - Handle both structured and unstructured log formats
494
+
495
+ Return the patterns using the generate_log_regex function.` ;
496
+
497
+ const request = {
498
+ messages : [ {
499
+ role : 'user' ,
500
+ content : prompt
501
+ } ] ,
502
+ model : this . haikuModel , // Use Haiku for regex generation (faster, cheaper)
503
+ max_tokens : 4000 ,
504
+ tools : tools ,
505
+ tool_choice : {
506
+ type : "tool" ,
507
+ name : "generate_log_regex"
508
+ }
509
+ } ;
510
+
511
+ try {
512
+ const response = await fetch ( this . apiEndpoint , {
513
+ method : 'POST' ,
514
+ headers : {
515
+ 'Content-Type' : 'application/json' ,
516
+ 'X-Api-Key' : this . apiKey ! ,
517
+ 'anthropic-version' : '2023-06-01'
518
+ } ,
519
+ body : JSON . stringify ( request )
520
+ } ) ;
521
+
522
+ let responseData ;
523
+ try {
524
+ responseData = await response . json ( ) ;
525
+ } catch ( e ) {
526
+ throw new Error ( `Invalid JSON response: ${ e } ` ) ;
527
+ }
528
+
529
+ if ( ! response . ok ) {
530
+ throw new Error ( `Claude API error: ${ response . statusText } \nDetails: ${ JSON . stringify ( responseData ) } ` ) ;
531
+ }
532
+
533
+ // Validate response structure
534
+ if ( ! responseData . content ||
535
+ ! Array . isArray ( responseData . content ) ||
536
+ responseData . content . length === 0 ||
537
+ responseData . content [ 0 ] . type !== 'tool_use' ||
538
+ ! responseData . content [ 0 ] . input ||
539
+ ! responseData . content [ 0 ] . input . patterns ) {
540
+ throw new Error ( 'Invalid response format from Claude API: Missing required structure' ) ;
541
+ }
542
+
543
+ // Extract patterns from response
544
+ const patterns = responseData . content [ 0 ] . input . patterns ;
545
+
546
+ // Validate each pattern
547
+ for ( const pattern of patterns ) {
548
+ if ( ! pattern . pattern || ! pattern . description || ! pattern . extractionMap ) {
549
+ console . warn ( 'Invalid pattern in Claude response:' , pattern ) ;
550
+ continue ;
551
+ }
552
+
553
+ // Test if the pattern is a valid regex
554
+ try {
555
+ new RegExp ( pattern . pattern ) ;
556
+ } catch ( error ) {
557
+ console . warn ( `Invalid regex pattern: ${ pattern . pattern } ` , error ) ;
558
+ // Remove invalid patterns
559
+ patterns . splice ( patterns . indexOf ( pattern ) , 1 ) ;
560
+ }
561
+ }
562
+
563
+ return patterns as RegexPattern [ ] ;
564
+ } catch ( error ) {
565
+ console . error ( 'Error calling Claude API for regex patterns:' , error ) ;
566
+ throw error ;
567
+ }
568
+ }
569
+
359
570
/**
360
571
* Filter and limit log lines to the most relevant ones for analysis
361
572
* @param currentLogLine The log line being analyzed
0 commit comments