st3v3nmw
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cmd/sourcerer/VERSION‎
Lines changed: 1 addition & 1 deletion b/‎cmd/sourcerer/VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎go.mod‎
Lines changed: 1 addition & 0 deletions b/‎go.mod‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎internal/analyzer/languages.go‎
Lines changed: 11 additions & 1 deletion b/‎internal/analyzer/languages.go‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎internal/parser/go.go‎
Lines changed: 5 additions & 3 deletions b/‎internal/parser/go.go‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎internal/parser/go_test.go‎
Lines changed: 11 additions & 13 deletions b/‎internal/parser/go_test.go‎
Lines changed: 11 additions & 13 deletions
diff --git a/‎internal/parser/markdown.go‎
Lines changed: 1 addition & 1 deletion b/‎internal/parser/markdown.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎internal/parser/parser.go‎
Lines changed: 47 additions & 35 deletions b/‎internal/parser/parser.go‎
Lines changed: 47 additions & 35 deletions
diff --git a/‎internal/parser/python.go‎
Lines changed: 59 additions & 0 deletions b/‎internal/parser/python.go‎
Lines changed: 59 additions & 0 deletions
@@ -94,9 +94,9 @@ dramatically reducing token usage and cognitive load.
 Language support requires writing [Tree-sitter queries](https://github.com/st3v3nmw/sourcerer-mcp/blob/main/internal/parser/go.go)
 to identify functions, classes, interfaces, and other code structures for each language.
 
-**Supported:** Go, Markdown
+**Supported:** Go, Markdown, Python
 
-**Planned:** Python, TypeScript, JavaScript
+**Planned:** TypeScript, JavaScript
 
 ## Contributing
 
 
@@ -1 +1 @@
-v0.3.1
+v0.4.0
@@ -13,6 +13,7 @@ require (
 	github.com/tree-sitter-grammars/tree-sitter-markdown v0.5.0
 	github.com/tree-sitter/go-tree-sitter v0.25.0
 	github.com/tree-sitter/tree-sitter-go v0.23.4
+	github.com/tree-sitter/tree-sitter-python v0.23.6
 )
 
 require (
 
@@ -12,6 +12,7 @@ type Language string
 const (
 	Go          Language = "go"
 	Markdown    Language = "markdown"
+	Python      Language = "python"
 	UnknownLang Language = "unknown"
 )
 
@@ -75,5 +76,14 @@ func init() {
 		[]string{".md", ".markdown"},
 		func(workspaceRoot string) (*parser.Parser, error) {
 			return parser.NewMarkdownParser(workspaceRoot)
-		})
+		},
+	)
+
+	languages.register(
+		Python,
+		[]string{".py"},
+		func(workspaceRoot string) (*parser.Parser, error) {
+			return parser.NewPythonParser(workspaceRoot)
+		},
+	)
 }
@@ -39,9 +39,11 @@ var GoSpec = &LanguageSpec{
 			NameQuery: `(const_declaration (const_spec name: (identifier) @name))`,
 		},
 	},
-	CommentTypes: []string{"comment"},
-	IgnoreTypes: []string{
-		"package_clause", // pollutes results with single-line matches
+	FoldIntoNextNode: []string{"comment"},
+	SkipTypes: []string{
+		// These pollute results
+		"package_clause",
+		"import_declaration",
 	},
 	FileTypeRules: []FileTypeRule{
 		{Pattern: "**/*_test.go", Type: FileTypeTests},
 
@@ -38,17 +38,6 @@ func (s *GoParserTestSuite) TestFunctionParsing() {
 			startLine: 1,
 			endLine:   1,
 		},
-		{
-			name:    "Imports Hashing",
-			path:    "44983311c5db2e3",
-			summary: "import (",
-			source: `import (
-	"context"
-	"fmt"
-)`,
-			startLine: 5,
-			endLine:   8,
-		},
 		{
 			name:    "Simple Function",
 			path:    "SimpleFunction",
@@ -513,6 +502,15 @@ var (
 			startLine: 63,
 			endLine:   68,
 		},
+		{
+			name:    "Another Multi Var Declaration",
+			path:    "b9303a3de4b66c8b",
+			summary: "var x, y string",
+			source: `// Another multi var declaration
+var x, y string`,
+			startLine: 70,
+			endLine:   71,
+		},
 		{
 			name:    "Single Variable",
 			path:    "DefaultConfig",
@@ -522,8 +520,8 @@ var DefaultConfig = BasicStruct{
 	Field1: "default",
 	Field2: 42,
 }`,
-			startLine: 70,
-			endLine:   74,
+			startLine: 73,
+			endLine:   77,
 		},
 	}
 
 
@@ -7,7 +7,7 @@ import (
 
 var MarkdownSpec = &LanguageSpec{
 	ExtractChildrenIn: []string{"section"},
-	IgnoreTypes: []string{
+	SkipTypes: []string{
 		// Headings are organizational markers, not containers.
 		"atx_heading", "setext_heading",
 		// We're chunking by section so lower level nodes don't get their own chunks
 
@@ -57,23 +57,35 @@ func (c *Chunk) ID() string {
 }
 
 // newChunk creates a new Chunk from related tree-sitter nodes
-func newChunk(
+func (p *Parser) newChunk(
 	node *tree_sitter.Node,
 	source []byte,
 	path string,
 	usedPaths map[string]bool,
 	fileType FileType,
-	comments []*tree_sitter.Node,
+	folded []*tree_sitter.Node,
+	extractor *NamedChunkExtractor,
 ) *Chunk {
 	finalPath := resolvePath(path, usedPaths)
-	startPos, startByte, endPos, endByte := calculateChunkBounds(node, comments)
-	nodeText := node.Utf8Text(source)
+	startPos, startByte, endPos, endByte := calculateChunkBounds(node, folded)
+
+	// Determine which node to use for the summary
+	summaryNode := node
+	if extractor != nil && extractor.SummaryNodeQuery != "" {
+		// Use the existing executeQuery method to find the summary node
+		matches, err := p.executeQuery(extractor.SummaryNodeQuery, node, source)
+		if err == nil && len(matches) > 0 {
+			summaryNode = matches[0]
+		}
+	}
+
+	summaryText := summaryNode.Utf8Text(source)
 	fullText := source[startByte:endByte]
 
 	return &Chunk{
 		Path:        finalPath,
 		Type:        string(fileType),
-		Summary:     summarize(nodeText),
+		Summary:     summarize(summaryText),
 		Source:      string(fullText),
 		StartLine:   startPos.Row + 1,
 		StartColumn: startPos.Column + 1,
@@ -101,8 +113,8 @@ func resolvePath(path string, usedPaths map[string]bool) string {
 }
 
 // calculateChunkBounds determines the start and end positions for a chunk,
-// extending to include any preceding comments
-func calculateChunkBounds(node *tree_sitter.Node, comments []*tree_sitter.Node) (
+// extending to include any preceding folded nodes
+func calculateChunkBounds(node *tree_sitter.Node, folded []*tree_sitter.Node) (
 	startPos tree_sitter.Point, startByte uint,
 	endPos tree_sitter.Point, endByte uint,
 ) {
@@ -111,10 +123,10 @@ func calculateChunkBounds(node *tree_sitter.Node, comments []*tree_sitter.Node)
 	endPos = node.EndPosition()
 	endByte = node.EndByte()
 
-	if len(comments) > 0 {
-		firstComment := comments[0]
-		startPos = firstComment.StartPosition()
-		startByte = firstComment.StartByte()
+	if len(folded) > 0 {
+		firstFolded := folded[0]
+		startPos = firstFolded.StartPosition()
+		startByte = firstFolded.StartByte()
 	}
 
 	return startPos, startByte, endPos, endByte
@@ -147,15 +159,16 @@ func summarize(source string) string {
 type LanguageSpec struct {
 	NamedChunks       map[string]NamedChunkExtractor // node types that can be extracted by name
 	ExtractChildrenIn []string                       // node types whose children should be recursively processed
-	CommentTypes      []string                       // node types that represent comments
-	IgnoreTypes       []string                       // node types to completely skip
+	FoldIntoNextNode  []string                       // node types to fold into next node, e.g., comments
+	SkipTypes         []string                       // node types to completely skip
 	FileTypeRules     []FileTypeRule                 // language-specific file type classification rules
 }
 
 // NamedChunkExtractor defines tree-sitter queries for extracting named code entities
 type NamedChunkExtractor struct {
-	NameQuery       string // query to extract the entity name
-	ParentNameQuery string // optional query to extract parent entity name for hierarchical paths
+	NameQuery        string // query to extract the entity name
+	ParentNameQuery  string // optional query to extract parent entity name for hierarchical paths
+	SummaryNodeQuery string // optional query to extract a specific node for the summary instead of the main node
 }
 
 // FileTypeRule defines a pattern-based rule for classifying file types
@@ -246,7 +259,6 @@ func (p *Parser) classifyFileType(filePath string) FileType {
 }
 
 // extractChunks recursively extracts semantic chunks from an AST node.
-// Comments are collected and folded into the next non-comment chunk to improve context.
 func (p *Parser) extractChunks(
 	node *tree_sitter.Node,
 	source []byte,
@@ -255,31 +267,31 @@ func (p *Parser) extractChunks(
 ) []*Chunk {
 	var chunks []*Chunk
 	usedPaths := map[string]bool{}
-	var comments []*tree_sitter.Node
+	var folded []*tree_sitter.Node
 
 	for i := uint(0); i < node.ChildCount(); i++ {
 		child := node.Child(i)
 		kind := child.Kind()
 
-		if slices.Contains(p.spec.IgnoreTypes, kind) {
-			// Process any preceding comments as standalone chunks
-			for _, comment := range comments {
-				chunks = append(chunks, p.extractNode(comment, source, usedPaths, fileType, nil))
+		if slices.Contains(p.spec.SkipTypes, kind) {
+			// Process any remaining folded nodes as standalone chunks
+			for _, foldedNode := range folded {
+				chunks = append(chunks, p.extractNode(foldedNode, source, usedPaths, fileType, nil))
 			}
-			comments = nil
+			folded = nil
 
 			continue
 		}
 
-		if slices.Contains(p.spec.CommentTypes, kind) {
-			comments = append(comments, child)
+		if slices.Contains(p.spec.FoldIntoNextNode, kind) {
+			folded = append(folded, child)
 			continue
 		}
 
-		// Process code nodes & fold comments, if any
-		chunk, path := p.createChunkFromNode(child, source, parentPath, fileType, usedPaths, comments)
+		// Process code nodes & folded nodes, if any
+		chunk, path := p.createChunkFromNode(child, source, parentPath, fileType, usedPaths, folded)
 		chunks = append(chunks, chunk)
-		comments = nil
+		folded = nil
 
 		// Recursively process children if specified
 		if slices.Contains(p.spec.ExtractChildrenIn, kind) {
@@ -288,9 +300,9 @@ func (p *Parser) extractChunks(
 		}
 	}
 
-	// Process any remaining comments as standalone chunks
-	for _, comment := range comments {
-		chunks = append(chunks, p.extractNode(comment, source, usedPaths, fileType, nil))
+	// Process any remaining folded nodes as standalone chunks
+	for _, foldedNode := range folded {
+		chunks = append(chunks, p.extractNode(foldedNode, source, usedPaths, fileType, nil))
 	}
 
 	return chunks
@@ -303,21 +315,21 @@ func (p *Parser) createChunkFromNode(
 	parentPath string,
 	fileType FileType,
 	usedPaths map[string]bool,
-	comments []*tree_sitter.Node,
+	folded []*tree_sitter.Node,
 ) (*Chunk, string) {
 	kind := node.Kind()
 	extractor, exists := p.spec.NamedChunks[kind]
 
 	if exists {
 		chunkPath, err := p.buildChunkPath(extractor, node, source, parentPath)
 		if err == nil {
-			chunk := newChunk(node, source, chunkPath, usedPaths, fileType, comments)
+			chunk := p.newChunk(node, source, chunkPath, usedPaths, fileType, folded, &extractor)
 			return chunk, chunkPath
 		}
 	}
 
 	// No named extractor or building chunk path failed, use content-hash
-	return p.extractNode(node, source, usedPaths, fileType, comments), parentPath
+	return p.extractNode(node, source, usedPaths, fileType, folded), parentPath
 }
 
 // extractNode creates a chunk from a node using content-based hashing for the path
@@ -326,12 +338,12 @@ func (p *Parser) extractNode(
 	source []byte,
 	usedPaths map[string]bool,
 	fileType FileType,
-	comments []*tree_sitter.Node,
+	folded []*tree_sitter.Node,
 ) *Chunk {
 	nodeSource := node.Utf8Text(source)
 	hash := fmt.Sprintf("%x", xxhash.Sum64String(nodeSource))
 
-	return newChunk(node, source, hash, usedPaths, fileType, comments)
+	return p.newChunk(node, source, hash, usedPaths, fileType, folded, nil)
 }
 
 // buildChunkPath constructs a hierarchical path for a named chunk using tree-sitter queries
 
@@ -0,0 +1,59 @@
+package parser
+
+import (
+	tree_sitter "github.com/tree-sitter/go-tree-sitter"
+	tree_sitter_python "github.com/tree-sitter/tree-sitter-python/bindings/go"
+)
+
+var PythonSpec = &LanguageSpec{
+	NamedChunks: map[string]NamedChunkExtractor{
+		"function_definition": {
+			NameQuery: `(function_definition name: (identifier) @name)`,
+		},
+		"class_definition": {
+			NameQuery: `(class_definition name: (identifier) @name)`,
+		},
+		"method_definition": {
+			NameQuery: `
+				(class_definition
+					body: (block
+						(function_definition name: (identifier) @name)))`,
+		},
+		"decorated_definition": {
+			NameQuery: `(decorated_definition definition: [
+				(function_definition name: (identifier) @name)
+				(class_definition name: (identifier) @name)
+			])`,
+			SummaryNodeQuery: `(decorated_definition definition: [
+				(function_definition) @summary
+				(class_definition) @summary
+			])`,
+		},
+	},
+	FoldIntoNextNode: []string{"comment"},
+	SkipTypes: []string{
+		// These pollute results
+		"import_statement",
+	},
+	FileTypeRules: []FileTypeRule{
+		{Pattern: "**/test*.py", Type: FileTypeTests},
+		{Pattern: "**/*_test.py", Type: FileTypeTests},
+		{Pattern: "**/__pycache__/**", Type: FileTypeIgnore},
+		{Pattern: "**/venv/**", Type: FileTypeIgnore},
+		{Pattern: "**/.venv/**", Type: FileTypeIgnore},
+		{Pattern: "**/env/**", Type: FileTypeIgnore},
+		{Pattern: "**/.env/**", Type: FileTypeIgnore},
+		{Pattern: "**/site-packages/**", Type: FileTypeIgnore},
+	},
+}
+
+func NewPythonParser(workspaceRoot string) (*Parser, error) {
+	parser := tree_sitter.NewParser()
+	parser.SetLanguage(tree_sitter.NewLanguage(tree_sitter_python.Language()))
+
+	return &Parser{
+		workspaceRoot: workspaceRoot,
+		parser:        parser,
+		spec:          PythonSpec,
+	}, nil
+}
Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@ require (`
`13`	`13`	`github.com/tree-sitter-grammars/tree-sitter-markdown v0.5.0`
`14`	`14`	`github.com/tree-sitter/go-tree-sitter v0.25.0`
`15`	`15`	`github.com/tree-sitter/tree-sitter-go v0.23.4`
	`16`	`+ github.com/tree-sitter/tree-sitter-python v0.23.6`
`16`	`17`	`)`
`17`	`18`
`18`	`19`	`require (`