@@ -57,23 +57,35 @@ func (c *Chunk) ID() string {
57
57
}
58
58
59
59
// newChunk creates a new Chunk from related tree-sitter nodes
60
- func newChunk (
60
+ func ( p * Parser ) newChunk (
61
61
node * tree_sitter.Node ,
62
62
source []byte ,
63
63
path string ,
64
64
usedPaths map [string ]bool ,
65
65
fileType FileType ,
66
- comments []* tree_sitter.Node ,
66
+ folded []* tree_sitter.Node ,
67
+ extractor * NamedChunkExtractor ,
67
68
) * Chunk {
68
69
finalPath := resolvePath (path , usedPaths )
69
- startPos , startByte , endPos , endByte := calculateChunkBounds (node , comments )
70
- nodeText := node .Utf8Text (source )
70
+ startPos , startByte , endPos , endByte := calculateChunkBounds (node , folded )
71
+
72
+ // Determine which node to use for the summary
73
+ summaryNode := node
74
+ if extractor != nil && extractor .SummaryNodeQuery != "" {
75
+ // Use the existing executeQuery method to find the summary node
76
+ matches , err := p .executeQuery (extractor .SummaryNodeQuery , node , source )
77
+ if err == nil && len (matches ) > 0 {
78
+ summaryNode = matches [0 ]
79
+ }
80
+ }
81
+
82
+ summaryText := summaryNode .Utf8Text (source )
71
83
fullText := source [startByte :endByte ]
72
84
73
85
return & Chunk {
74
86
Path : finalPath ,
75
87
Type : string (fileType ),
76
- Summary : summarize (nodeText ),
88
+ Summary : summarize (summaryText ),
77
89
Source : string (fullText ),
78
90
StartLine : startPos .Row + 1 ,
79
91
StartColumn : startPos .Column + 1 ,
@@ -101,8 +113,8 @@ func resolvePath(path string, usedPaths map[string]bool) string {
101
113
}
102
114
103
115
// calculateChunkBounds determines the start and end positions for a chunk,
104
- // extending to include any preceding comments
105
- func calculateChunkBounds (node * tree_sitter.Node , comments []* tree_sitter.Node ) (
116
+ // extending to include any preceding folded nodes
117
+ func calculateChunkBounds (node * tree_sitter.Node , folded []* tree_sitter.Node ) (
106
118
startPos tree_sitter.Point , startByte uint ,
107
119
endPos tree_sitter.Point , endByte uint ,
108
120
) {
@@ -111,10 +123,10 @@ func calculateChunkBounds(node *tree_sitter.Node, comments []*tree_sitter.Node)
111
123
endPos = node .EndPosition ()
112
124
endByte = node .EndByte ()
113
125
114
- if len (comments ) > 0 {
115
- firstComment := comments [0 ]
116
- startPos = firstComment .StartPosition ()
117
- startByte = firstComment .StartByte ()
126
+ if len (folded ) > 0 {
127
+ firstFolded := folded [0 ]
128
+ startPos = firstFolded .StartPosition ()
129
+ startByte = firstFolded .StartByte ()
118
130
}
119
131
120
132
return startPos , startByte , endPos , endByte
@@ -147,15 +159,16 @@ func summarize(source string) string {
147
159
type LanguageSpec struct {
148
160
NamedChunks map [string ]NamedChunkExtractor // node types that can be extracted by name
149
161
ExtractChildrenIn []string // node types whose children should be recursively processed
150
- CommentTypes []string // node types that represent comments
151
- IgnoreTypes []string // node types to completely skip
162
+ FoldIntoNextNode []string // node types to fold into next node, e.g., comments
163
+ SkipTypes []string // node types to completely skip
152
164
FileTypeRules []FileTypeRule // language-specific file type classification rules
153
165
}
154
166
155
167
// NamedChunkExtractor defines tree-sitter queries for extracting named code entities
156
168
type NamedChunkExtractor struct {
157
- NameQuery string // query to extract the entity name
158
- ParentNameQuery string // optional query to extract parent entity name for hierarchical paths
169
+ NameQuery string // query to extract the entity name
170
+ ParentNameQuery string // optional query to extract parent entity name for hierarchical paths
171
+ SummaryNodeQuery string // optional query to extract a specific node for the summary instead of the main node
159
172
}
160
173
161
174
// FileTypeRule defines a pattern-based rule for classifying file types
@@ -246,7 +259,6 @@ func (p *Parser) classifyFileType(filePath string) FileType {
246
259
}
247
260
248
261
// extractChunks recursively extracts semantic chunks from an AST node.
249
- // Comments are collected and folded into the next non-comment chunk to improve context.
250
262
func (p * Parser ) extractChunks (
251
263
node * tree_sitter.Node ,
252
264
source []byte ,
@@ -255,31 +267,31 @@ func (p *Parser) extractChunks(
255
267
) []* Chunk {
256
268
var chunks []* Chunk
257
269
usedPaths := map [string ]bool {}
258
- var comments []* tree_sitter.Node
270
+ var folded []* tree_sitter.Node
259
271
260
272
for i := uint (0 ); i < node .ChildCount (); i ++ {
261
273
child := node .Child (i )
262
274
kind := child .Kind ()
263
275
264
- if slices .Contains (p .spec .IgnoreTypes , kind ) {
265
- // Process any preceding comments as standalone chunks
266
- for _ , comment := range comments {
267
- chunks = append (chunks , p .extractNode (comment , source , usedPaths , fileType , nil ))
276
+ if slices .Contains (p .spec .SkipTypes , kind ) {
277
+ // Process any remaining folded nodes as standalone chunks
278
+ for _ , foldedNode := range folded {
279
+ chunks = append (chunks , p .extractNode (foldedNode , source , usedPaths , fileType , nil ))
268
280
}
269
- comments = nil
281
+ folded = nil
270
282
271
283
continue
272
284
}
273
285
274
- if slices .Contains (p .spec .CommentTypes , kind ) {
275
- comments = append (comments , child )
286
+ if slices .Contains (p .spec .FoldIntoNextNode , kind ) {
287
+ folded = append (folded , child )
276
288
continue
277
289
}
278
290
279
- // Process code nodes & fold comments , if any
280
- chunk , path := p .createChunkFromNode (child , source , parentPath , fileType , usedPaths , comments )
291
+ // Process code nodes & folded nodes , if any
292
+ chunk , path := p .createChunkFromNode (child , source , parentPath , fileType , usedPaths , folded )
281
293
chunks = append (chunks , chunk )
282
- comments = nil
294
+ folded = nil
283
295
284
296
// Recursively process children if specified
285
297
if slices .Contains (p .spec .ExtractChildrenIn , kind ) {
@@ -288,9 +300,9 @@ func (p *Parser) extractChunks(
288
300
}
289
301
}
290
302
291
- // Process any remaining comments as standalone chunks
292
- for _ , comment := range comments {
293
- chunks = append (chunks , p .extractNode (comment , source , usedPaths , fileType , nil ))
303
+ // Process any remaining folded nodes as standalone chunks
304
+ for _ , foldedNode := range folded {
305
+ chunks = append (chunks , p .extractNode (foldedNode , source , usedPaths , fileType , nil ))
294
306
}
295
307
296
308
return chunks
@@ -303,21 +315,21 @@ func (p *Parser) createChunkFromNode(
303
315
parentPath string ,
304
316
fileType FileType ,
305
317
usedPaths map [string ]bool ,
306
- comments []* tree_sitter.Node ,
318
+ folded []* tree_sitter.Node ,
307
319
) (* Chunk , string ) {
308
320
kind := node .Kind ()
309
321
extractor , exists := p .spec .NamedChunks [kind ]
310
322
311
323
if exists {
312
324
chunkPath , err := p .buildChunkPath (extractor , node , source , parentPath )
313
325
if err == nil {
314
- chunk := newChunk (node , source , chunkPath , usedPaths , fileType , comments )
326
+ chunk := p . newChunk (node , source , chunkPath , usedPaths , fileType , folded , & extractor )
315
327
return chunk , chunkPath
316
328
}
317
329
}
318
330
319
331
// No named extractor or building chunk path failed, use content-hash
320
- return p .extractNode (node , source , usedPaths , fileType , comments ), parentPath
332
+ return p .extractNode (node , source , usedPaths , fileType , folded ), parentPath
321
333
}
322
334
323
335
// extractNode creates a chunk from a node using content-based hashing for the path
@@ -326,12 +338,12 @@ func (p *Parser) extractNode(
326
338
source []byte ,
327
339
usedPaths map [string ]bool ,
328
340
fileType FileType ,
329
- comments []* tree_sitter.Node ,
341
+ folded []* tree_sitter.Node ,
330
342
) * Chunk {
331
343
nodeSource := node .Utf8Text (source )
332
344
hash := fmt .Sprintf ("%x" , xxhash .Sum64String (nodeSource ))
333
345
334
- return newChunk (node , source , hash , usedPaths , fileType , comments )
346
+ return p . newChunk (node , source , hash , usedPaths , fileType , folded , nil )
335
347
}
336
348
337
349
// buildChunkPath constructs a hierarchical path for a named chunk using tree-sitter queries
0 commit comments