generated from cloudwego/.github
-
Notifications
You must be signed in to change notification settings - Fork 140
Open
Description
希望增加word文档、excel文档支持。我写了一个版本,不知道是否合适。
https://github.com/leapar/eino-ext
package indexing
import (
"context"
"github.com/cloudwego/eino-ext/components/document/loader/file"
"github.com/cloudwego/eino-ext/components/document/parser/csv"
"github.com/cloudwego/eino-ext/components/document/parser/doc"
"github.com/cloudwego/eino-ext/components/document/parser/pdf"
"github.com/cloudwego/eino-ext/components/document/parser/xlsx"
"github.com/cloudwego/eino/components/document"
"github.com/cloudwego/eino/components/document/parser"
)
// newLoader component initialization function of node 'FileLoader' in graph 'KnowledgeIndexing'
func newLoader(ctx context.Context) (ldr document.Loader, err error) {
pdfParser, err := pdf.NewPDFParser(&pdf.Config{ToPages: true})
if err != nil {
return nil, err
}
docParser, err := doc.NewDocParser()
if err != nil {
return nil, err
}
csvParser, err := csv.NewCsvParser()
if err != nil {
return nil, err
}
xlsxParser, err := xlsx.NewXlsxParser()
if err != nil {
return nil, err
}
txtParser := &parser.TextParser{}
extParser, err := parser.NewExtParser(ctx, &parser.ExtParserConfig{
FallbackParser: parser.TextParser{},
Parsers: map[string]parser.Parser{
".pdf": pdfParser,
".txt": txtParser,
".md": txtParser,
".doc": docParser,
".docx": docParser,
".xlsx": xlsxParser,
".csv": csvParser,
},
})
if err != nil {
return nil, err
}
// TODO Modify component configuration here.
config := &file.FileLoaderConfig{
Parser: extParser,
}
ldr, err = file.NewFileLoader(ctx, config)
if err != nil {
return nil, err
}
return ldr, nil
}
Metadata
Metadata
Assignees
Labels
No labels