From 2abef737dfa553abc5fedbff861e4783c18e85ec Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 8 Jun 2025 12:08:18 -0700 Subject: [PATCH 1/4] Refactor CatFile batch implementation and introduce batch-command for git 2.36 --- modules/git/batch.go | 144 ++++++++++++++++-- modules/git/batch_reader.go | 6 +- modules/git/blob_nogogit.go | 11 +- modules/git/git.go | 12 +- .../languagestats/language_stats_nogogit.go | 5 +- modules/git/pipeline/lfs_nogogit.go | 19 +-- modules/git/repo_base_nogogit.go | 47 +----- modules/git/repo_branch_nogogit.go | 12 +- modules/git/repo_commit_nogogit.go | 37 +++-- modules/git/repo_tag_nogogit.go | 11 +- modules/git/repo_tree_nogogit.go | 10 +- modules/git/tree_entry_nogogit.go | 6 +- modules/git/tree_nogogit.go | 8 +- modules/indexer/code/bleve/bleve.go | 9 +- .../code/elasticsearch/elasticsearch.go | 9 +- 15 files changed, 219 insertions(+), 127 deletions(-) diff --git a/modules/git/batch.go b/modules/git/batch.go index f9e1748b548f5..a89215dc95136 100644 --- a/modules/git/batch.go +++ b/modules/git/batch.go @@ -8,40 +8,160 @@ import ( "context" ) -type Batch struct { +type batchCatFile struct { cancel context.CancelFunc Reader *bufio.Reader Writer WriteCloserError } +func (b *batchCatFile) Close() { + if b.cancel != nil { + b.cancel() + b.Reader = nil + b.Writer = nil + b.cancel = nil + } +} + // NewBatch creates a new batch for the given repository, the Close must be invoked before release the batch -func NewBatch(ctx context.Context, repoPath string) (*Batch, error) { +func newBatch(ctx context.Context, repoPath string) (*batchCatFile, error) { // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! if err := ensureValidGitRepository(ctx, repoPath); err != nil { return nil, err } - var batch Batch - batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath) + var batch batchCatFile + batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath, "--batch") return &batch, nil } -func NewBatchCheck(ctx context.Context, repoPath string) (*Batch, error) { +func newBatchCheck(ctx context.Context, repoPath string) (*batchCatFile, error) { // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! if err := ensureValidGitRepository(ctx, repoPath); err != nil { return nil, err } - var check Batch + var check batchCatFile check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repoPath) return &check, nil } -func (b *Batch) Close() { - if b.cancel != nil { - b.cancel() - b.Reader = nil - b.Writer = nil - b.cancel = nil +func newBatchCommand(ctx context.Context, repoPath string) (*batchCatFile, error) { + // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! + if err := ensureValidGitRepository(ctx, repoPath); err != nil { + return nil, err + } + + var check batchCatFile + check.Writer, check.Reader, check.cancel = catFileBatch(ctx, repoPath, "--batch-command") + return &check, nil +} + +type Batch interface { + Write([]byte) (int, error) + WriteCheck([]byte) (int, error) + Reader() *bufio.Reader + CheckReader() *bufio.Reader + Close() +} + +// batchCatFileWithCheck implements the Batch interface using the "cat-file --batch" command and "cat-file --batch-check" command +// ref: https://git-scm.com/docs/git-cat-file#Documentation/git-cat-file.txt---batch +// To align with --batch-command, we creates the two commands both at the same time if git version is lower than 2.36 +type batchCatFileWithCheck struct { + batch *batchCatFile + batchCheck *batchCatFile +} + +var _ Batch = &batchCatFileWithCheck{} + +func newBatchCatFileWithCheck(ctx context.Context, repoPath string) (*batchCatFileWithCheck, error) { + batch, err := newBatch(ctx, repoPath) + if err != nil { + return nil, err + } + + batchCheck, err := newBatchCheck(ctx, repoPath) + if err != nil { + return nil, err + } + + return &batchCatFileWithCheck{ + batch: batch, + batchCheck: batchCheck, + }, nil +} + +func (b *batchCatFileWithCheck) Write(bs []byte) (int, error) { + return b.batch.Writer.Write(bs) +} + +func (b *batchCatFileWithCheck) WriteCheck(bs []byte) (int, error) { + return b.batchCheck.Writer.Write(bs) +} + +func (b *batchCatFileWithCheck) Reader() *bufio.Reader { + return b.batch.Reader +} + +func (b *batchCatFileWithCheck) CheckReader() *bufio.Reader { + return b.batchCheck.Reader +} + +func (b *batchCatFileWithCheck) Close() { + if b.batch != nil { + b.batch.Close() + b.batch = nil + } + if b.batchCheck != nil { + b.batchCheck.Close() + b.batchCheck = nil + } +} + +// batchCommandCatFile implements the Batch interface using the "cat-file --batch-command" command +// ref: https://git-scm.com/docs/git-cat-file#Documentation/git-cat-file.txt---batch-command +type batchCommandCatFile struct { + batch *batchCatFile +} + +func newBatchCommandCatFile(ctx context.Context, repoPath string) (*batchCommandCatFile, error) { + batch, err := newBatchCommand(ctx, repoPath) + if err != nil { + return nil, err + } + + return &batchCommandCatFile{ + batch: batch, + }, nil +} + +func (b *batchCommandCatFile) Write(bs []byte) (int, error) { + return b.batch.Writer.Write(append([]byte("contents "), bs...)) +} + +func (b *batchCommandCatFile) WriteCheck(bs []byte) (int, error) { + return b.batch.Writer.Write(append([]byte("info "), bs...)) +} + +func (b *batchCommandCatFile) Reader() *bufio.Reader { + return b.batch.Reader +} + +func (b *batchCommandCatFile) CheckReader() *bufio.Reader { + return b.batch.Reader +} + +func (b *batchCommandCatFile) Close() { + if b.batch != nil { + b.batch.Close() + b.batch = nil + } +} + +func NewBatch(ctx context.Context, repoPath string) (Batch, error) { + if DefaultFeatures().SupportCatFileBatchCommand { + return newBatchCommandCatFile(ctx, repoPath) } + return newBatchCatFileWithCheck(ctx, repoPath) } diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 7bbab76bb821c..20e082526d3a8 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -87,7 +87,8 @@ func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, } // catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function -func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { +// batchArg is the argument to pass to cat-file --batch, e.g. "--batch" or "--batch-command" +func catFileBatch(ctx context.Context, repoPath, batchArg string) (WriteCloserError, *bufio.Reader, func()) { // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout batchStdinReader, batchStdinWriter := io.Pipe() @@ -109,7 +110,8 @@ func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufi go func() { stderr := strings.Builder{} - err := NewCommand("cat-file", "--batch"). + err := NewCommand("cat-file"). + AddDynamicArguments(batchArg). Run(ctx, &RunOpts{ Dir: repoPath, Stdin: batchStdinReader, diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index af3ce376d6a46..d5ee9bdf960d7 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -26,16 +26,17 @@ type Blob struct { // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) + batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) if err != nil { return nil, err } - _, err = wr.Write([]byte(b.ID.String() + "\n")) + _, err = batch.Write([]byte(b.ID.String() + "\n")) if err != nil { cancel() return nil, err } + rd := batch.Reader() _, _, size, err := ReadBatchLine(rd) if err != nil { cancel() @@ -67,18 +68,18 @@ func (b *Blob) Size() int64 { return b.size } - wr, rd, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) + batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } defer cancel() - _, err = wr.Write([]byte(b.ID.String() + "\n")) + _, err = batch.WriteCheck([]byte(b.ID.String() + "\n")) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } - _, _, b.size, err = ReadBatchLine(rd) + _, _, b.size, err = ReadBatchLine(batch.CheckReader()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 diff --git a/modules/git/git.go b/modules/git/git.go index a2ffd6d289880..125e957ebe7bd 100644 --- a/modules/git/git.go +++ b/modules/git/git.go @@ -26,11 +26,12 @@ const RequiredVersion = "2.0.0" // the minimum Git version required type Features struct { gitVersion *version.Version - UsingGogit bool - SupportProcReceive bool // >= 2.29 - SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’ - SupportedObjectFormats []ObjectFormat // sha1, sha256 - SupportCheckAttrOnBare bool // >= 2.40 + UsingGogit bool + SupportProcReceive bool // >= 2.29 + SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’ + SupportedObjectFormats []ObjectFormat // sha1, sha256 + SupportCheckAttrOnBare bool // >= 2.40 + SupportCatFileBatchCommand bool // >= 2.36, support `git cat-file --batch-command` } var ( @@ -79,6 +80,7 @@ func loadGitVersionFeatures() (*Features, error) { features.SupportedObjectFormats = append(features.SupportedObjectFormats, Sha256ObjectFormat) } features.SupportCheckAttrOnBare = features.CheckVersionAtLeast("2.40") + features.SupportCatFileBatchCommand = features.CheckVersionAtLeast("2.36") return features, nil } diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index 94cf9fff8c129..7cad15bc85347 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -22,20 +22,21 @@ import ( func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) { // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // so let's create a batch stdin and stdout - batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() writeID := func(id string) error { - _, err := batchStdinWriter.Write([]byte(id + "\n")) + _, err := batch.Write([]byte(id + "\n")) return err } if err := writeID(commitID); err != nil { return nil, err } + batchReader := batch.Reader() shaBytes, typ, size, err := git.ReadBatchLine(batchReader) if typ != "commit" { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index c5eed737011a6..f3a2afed5a2cc 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -46,7 +46,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout - batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } @@ -60,17 +60,14 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err fnameBuf := make([]byte, 4096) modeBuf := make([]byte, 40) workingShaBuf := make([]byte, objectID.Type().FullLength()/2) + batchReader := batch.Reader() for scan.Scan() { // Get the next commit ID commitID := scan.Bytes() // push the commit to the cat-file --batch process - _, err := batchStdinWriter.Write(commitID) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte{'\n'}) + _, err := batch.Write(append(commitID, '\n')) if err != nil { return nil, err } @@ -92,7 +89,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err if err != nil { return nil, err } - _, err = batchStdinWriter.Write([]byte(id + "\n")) + _, err = batch.Write([]byte(id + "\n")) if err != nil { return nil, err } @@ -107,7 +104,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } - if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil { + if _, err := batch.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil { return nil, err } curPath = "" @@ -139,11 +136,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } if len(trees) > 0 { - _, err := batchStdinWriter.Write(trees[len(trees)-1]) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte("\n")) + _, err := batch.Write(append(trees[len(trees)-1], '\n')) if err != nil { return nil, err } diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index 6f9bfd4b434b1..e67b66f76a9ad 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -7,7 +7,6 @@ package git import ( - "bufio" "context" "path/filepath" @@ -26,10 +25,7 @@ type Repository struct { gpgSettings *GPGSettings batchInUse bool - batch *Batch - - checkInUse bool - check *Batch + batch Batch Ctx context.Context LastCommitCache *LastCommitCache @@ -64,18 +60,18 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) { } // CatFileBatch obtains a CatFileBatch for this repository -func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bufio.Reader, func(), error) { +func (repo *Repository) CatFileBatch(ctx context.Context) (Batch, func(), error) { if repo.batch == nil { var err error repo.batch, err = NewBatch(ctx, repo.Path) if err != nil { - return nil, nil, nil, err + return nil, nil, err } } if !repo.batchInUse { repo.batchInUse = true - return repo.batch.Writer, repo.batch.Reader, func() { + return repo.batch, func() { repo.batchInUse = false }, nil } @@ -83,34 +79,9 @@ func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bu log.Debug("Opening temporary cat file batch for: %s", repo.Path) tempBatch, err := NewBatch(ctx, repo.Path) if err != nil { - return nil, nil, nil, err + return nil, nil, err } - return tempBatch.Writer, tempBatch.Reader, tempBatch.Close, nil -} - -// CatFileBatchCheck obtains a CatFileBatchCheck for this repository -func (repo *Repository) CatFileBatchCheck(ctx context.Context) (WriteCloserError, *bufio.Reader, func(), error) { - if repo.check == nil { - var err error - repo.check, err = NewBatchCheck(ctx, repo.Path) - if err != nil { - return nil, nil, nil, err - } - } - - if !repo.checkInUse { - repo.checkInUse = true - return repo.check.Writer, repo.check.Reader, func() { - repo.checkInUse = false - }, nil - } - - log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) - tempBatchCheck, err := NewBatchCheck(ctx, repo.Path) - if err != nil { - return nil, nil, nil, err - } - return tempBatchCheck.Writer, tempBatchCheck.Reader, tempBatchCheck.Close, nil + return tempBatch, tempBatch.Close, nil } func (repo *Repository) Close() error { @@ -122,11 +93,7 @@ func (repo *Repository) Close() error { repo.batch = nil repo.batchInUse = false } - if repo.check != nil { - repo.check.Close() - repo.check = nil - repo.checkInUse = false - } + repo.LastCommitCache = nil repo.tagCache = nil return nil diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 0d11198523515..902157c0efff5 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -22,18 +22,18 @@ func (repo *Repository) IsObjectExist(name string) bool { return false } - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } defer cancel() - _, err = wr.Write([]byte(name + "\n")) + _, err = batch.WriteCheck([]byte(name + "\n")) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } - sha, _, _, err := ReadBatchLine(rd) + sha, _, _, err := ReadBatchLine(batch.CheckReader()) return err == nil && bytes.HasPrefix(sha, []byte(strings.TrimSpace(name))) } @@ -43,18 +43,18 @@ func (repo *Repository) IsReferenceExist(name string) bool { return false } - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } defer cancel() - _, err = wr.Write([]byte(name + "\n")) + _, err = batch.WriteCheck([]byte(name + "\n")) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } - _, _, _, err = ReadBatchLine(rd) + _, _, _, err = ReadBatchLine(batch.CheckReader()) return err == nil } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index 3ead3e22165f4..a2810a66afcfd 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -6,7 +6,6 @@ package git import ( - "bufio" "errors" "io" "strings" @@ -33,16 +32,16 @@ func (repo *Repository) ResolveReference(name string) (string, error) { // GetRefCommitID returns the last commit ID string of given reference (branch or tag). func (repo *Repository) GetRefCommitID(name string) (string, error) { - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = wr.Write([]byte(name + "\n")) + _, err = batch.WriteCheck([]byte(name + "\n")) if err != nil { return "", err } - shaBs, _, _, err := ReadBatchLine(rd) + shaBs, _, _, err := ReadBatchLine(batch.CheckReader()) if IsErrNotExist(err) { return "", ErrNotExist{name, ""} } @@ -73,19 +72,19 @@ func (repo *Repository) IsCommitExist(name string) bool { } func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { - wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = wr.Write([]byte(id.String() + "\n")) + _, _ = batch.Write([]byte(id.String() + "\n")) - return repo.getCommitFromBatchReader(wr, rd, id) + return repo.getCommitFromBatchReader(batch, id) } -func (repo *Repository) getCommitFromBatchReader(wr WriteCloserError, rd *bufio.Reader, id ObjectID) (*Commit, error) { - _, typ, size, err := ReadBatchLine(rd) +func (repo *Repository) getCommitFromBatchReader(batch Batch, id ObjectID) (*Commit, error) { + _, typ, size, err := ReadBatchLine(batch.Reader()) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { return nil, ErrNotExist{ID: id.String()} @@ -99,11 +98,11 @@ func (repo *Repository) getCommitFromBatchReader(wr WriteCloserError, rd *bufio. case "tag": // then we need to parse the tag // and load the commit - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(batch.Reader(), size)) if err != nil { return nil, err } - _, err = rd.Discard(1) + _, err = batch.Reader().Discard(1) if err != nil { return nil, err } @@ -112,22 +111,22 @@ func (repo *Repository) getCommitFromBatchReader(wr WriteCloserError, rd *bufio. return nil, err } - if _, err := wr.Write([]byte(tag.Object.String() + "\n")); err != nil { + if _, err := batch.Write([]byte(tag.Object.String() + "\n")); err != nil { return nil, err } - commit, err := repo.getCommitFromBatchReader(wr, rd, tag.Object) + commit, err := repo.getCommitFromBatchReader(batch, tag.Object) if err != nil { return nil, err } return commit, nil case "commit": - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) + commit, err := CommitFromReader(repo, id, io.LimitReader(batch.Reader(), size)) if err != nil { return nil, err } - _, err = rd.Discard(1) + _, err = batch.Reader().Discard(1) if err != nil { return nil, err } @@ -135,7 +134,7 @@ func (repo *Repository) getCommitFromBatchReader(wr WriteCloserError, rd *bufio. return commit, nil default: log.Debug("Unknown typ: %s", typ) - if err := DiscardFull(rd, size+1); err != nil { + if err := DiscardFull(batch.Reader(), size+1); err != nil { return nil, err } return nil, ErrNotExist{ @@ -157,16 +156,16 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } } - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, err = wr.Write([]byte(commitID + "\n")) + _, err = batch.WriteCheck([]byte(commitID + "\n")) if err != nil { return nil, err } - sha, _, _, err := ReadBatchLine(rd) + sha, _, _, err := ReadBatchLine(batch.CheckReader()) if err != nil { if IsErrNotExist(err) { return nil, ErrNotExist{commitID, ""} diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index 3d2b4f52bde55..316b797b73298 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -31,16 +31,16 @@ func (repo *Repository) GetTags(skip, limit int) (tags []string, err error) { // GetTagType gets the type of the tag, either commit (simple) or tag (annotated) func (repo *Repository) GetTagType(id ObjectID) (string, error) { - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = wr.Write([]byte(id.String() + "\n")) + _, err = batch.WriteCheck([]byte(id.String() + "\n")) if err != nil { return "", err } - _, typ, _, err := ReadBatchLine(rd) + _, typ, _, err := ReadBatchLine(batch.CheckReader()) if err != nil { if IsErrNotExist(err) { return "", ErrNotExist{ID: id.String()} @@ -95,15 +95,16 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { } // The tag is an annotated tag with a message. - wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - if _, err := wr.Write([]byte(tagID.String() + "\n")); err != nil { + if _, err := batch.Write([]byte(tagID.String() + "\n")); err != nil { return nil, err } + rd := batch.Reader() _, typ, size, err := ReadBatchLine(rd) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index 1954f8516219d..05cd94fd9f49d 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -10,13 +10,15 @@ import ( ) func (repo *Repository) getTree(id ObjectID) (*Tree, error) { - wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = wr.Write([]byte(id.String() + "\n")) + _, _ = batch.Write([]byte(id.String() + "\n")) + + rd := batch.Reader() // ignore the SHA _, typ, size, err := ReadBatchLine(rd) @@ -36,10 +38,10 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { return nil, err } - if _, err := wr.Write([]byte(tag.Object.String() + "\n")); err != nil { + if _, err := batch.Write([]byte(tag.Object.String() + "\n")); err != nil { return nil, err } - commit, err := repo.getCommitFromBatchReader(wr, rd, tag.Object) + commit, err := repo.getCommitFromBatchReader(batch, tag.Object) if err != nil { return nil, err } diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index 0c0e1835f172d..da4afa8f336ef 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -36,18 +36,18 @@ func (te *TreeEntry) Size() int64 { return te.size } - wr, rd, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) + batch, cancel, err := te.ptree.repo.CatFileBatch(te.ptree.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } defer cancel() - _, err = wr.Write([]byte(te.ID.String() + "\n")) + _, err = batch.WriteCheck([]byte(te.ID.String() + "\n")) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } - _, _, te.size, err = ReadBatchLine(rd) + _, _, te.size, err = ReadBatchLine(batch.CheckReader()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index f88788418e27d..eea811c2c9eb8 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -33,13 +33,15 @@ func (t *Tree) ListEntries() (Entries, error) { } if t.repo != nil { - wr, rd, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) + batch, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = wr.Write([]byte(t.ID.String() + "\n")) + rd := batch.Reader() + + _, _ = batch.Write([]byte(t.ID.String() + "\n")) _, typ, sz, err := ReadBatchLine(rd) if err != nil { return nil, err @@ -49,7 +51,7 @@ func (t *Tree) ListEntries() (Entries, error) { if err != nil && err != io.EOF { return nil, err } - _, _ = wr.Write([]byte(treeID + "\n")) + _, _ = batch.Write([]byte(treeID + "\n")) _, typ, sz, err = ReadBatchLine(rd) if err != nil { return nil, err diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 70f0995a012bc..ccca467a0dd72 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -4,7 +4,6 @@ package bleve import ( - "bufio" "context" "fmt" "io" @@ -149,7 +148,7 @@ func NewIndexer(indexDir string) *Indexer { } } -func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, +func (b *Indexer) addUpdate(ctx context.Context, gitBatch git.Batch, commitSha string, update internal.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search @@ -175,10 +174,12 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return b.addDelete(update.Filename, repo, batch) } - if _, err := batchWriter.Write([]byte(update.BlobSha + "\n")); err != nil { + if _, err := gitBatch.Write([]byte(update.BlobSha + "\n")); err != nil { return err } + batchReader := gitBatch.Reader() + _, _, size, err = git.ReadBatchLine(batchReader) if err != nil { return err @@ -223,7 +224,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st defer gitBatch.Close() for _, update := range changes.Updates { - if err := b.addUpdate(ctx, gitBatch.Writer, gitBatch.Reader, sha, update, repo, batch); err != nil { + if err := b.addUpdate(ctx, gitBatch, sha, update, repo, batch); err != nil { return err } } diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index f925ce396a321..76d6064de81f0 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -4,7 +4,6 @@ package elasticsearch import ( - "bufio" "context" "fmt" "io" @@ -137,7 +136,7 @@ const ( }` ) -func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *Indexer) addUpdate(ctx context.Context, batch git.Batch, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -160,10 +159,12 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - if _, err := batchWriter.Write([]byte(update.BlobSha + "\n")); err != nil { + if _, err := batch.Write([]byte(update.BlobSha + "\n")); err != nil { return nil, err } + batchReader := batch.Reader() + _, _, size, err = git.ReadBatchLine(batchReader) if err != nil { return nil, err @@ -215,7 +216,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st defer batch.Close() for _, update := range changes.Updates { - updateReqs, err := b.addUpdate(ctx, batch.Writer, batch.Reader, sha, update, repo) + updateReqs, err := b.addUpdate(ctx, batch, sha, update, repo) if err != nil { return err } From bb8f1cf9023d9a2889364726547a81a8e69be4a5 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 8 Jun 2025 12:18:45 -0700 Subject: [PATCH 2/4] improvements --- modules/git/batch.go | 63 +++++++++++++------------------------------- 1 file changed, 18 insertions(+), 45 deletions(-) diff --git a/modules/git/batch.go b/modules/git/batch.go index a89215dc95136..a11278fe6b1e4 100644 --- a/modules/git/batch.go +++ b/modules/git/batch.go @@ -23,40 +23,6 @@ func (b *batchCatFile) Close() { } } -// NewBatch creates a new batch for the given repository, the Close must be invoked before release the batch -func newBatch(ctx context.Context, repoPath string) (*batchCatFile, error) { - // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := ensureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var batch batchCatFile - batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath, "--batch") - return &batch, nil -} - -func newBatchCheck(ctx context.Context, repoPath string) (*batchCatFile, error) { - // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := ensureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var check batchCatFile - check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repoPath) - return &check, nil -} - -func newBatchCommand(ctx context.Context, repoPath string) (*batchCatFile, error) { - // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := ensureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var check batchCatFile - check.Writer, check.Reader, check.cancel = catFileBatch(ctx, repoPath, "--batch-command") - return &check, nil -} - type Batch interface { Write([]byte) (int, error) WriteCheck([]byte) (int, error) @@ -75,20 +41,22 @@ type batchCatFileWithCheck struct { var _ Batch = &batchCatFileWithCheck{} +// newBatchCatFileWithCheck creates a new batch and a new batch check for the given repository, the Close must be invoked before release the batch func newBatchCatFileWithCheck(ctx context.Context, repoPath string) (*batchCatFileWithCheck, error) { - batch, err := newBatch(ctx, repoPath) - if err != nil { + // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! + if err := ensureValidGitRepository(ctx, repoPath); err != nil { return nil, err } - batchCheck, err := newBatchCheck(ctx, repoPath) - if err != nil { - return nil, err - } + var batch batchCatFile + batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath, "--batch") + + var check batchCatFile + check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repoPath) return &batchCatFileWithCheck{ - batch: batch, - batchCheck: batchCheck, + batch: &batch, + batchCheck: &check, }, nil } @@ -125,14 +93,19 @@ type batchCommandCatFile struct { batch *batchCatFile } +var _ Batch = &batchCommandCatFile{} + func newBatchCommandCatFile(ctx context.Context, repoPath string) (*batchCommandCatFile, error) { - batch, err := newBatchCommand(ctx, repoPath) - if err != nil { + // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! + if err := ensureValidGitRepository(ctx, repoPath); err != nil { return nil, err } + var batch batchCatFile + batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath, "--batch-command") + return &batchCommandCatFile{ - batch: batch, + batch: &batch, }, nil } From db4813a2dd4dca94a482a8f41a382bd5f105ae45 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 8 Jun 2025 12:51:03 -0700 Subject: [PATCH 3/4] Fix command arg --- modules/git/batch_reader.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 20e082526d3a8..33f6dd958d0ce 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -12,6 +12,7 @@ import ( "strconv" "strings" + "code.gitea.io/gitea/modules/git/internal" "code.gitea.io/gitea/modules/log" "github.com/djherbis/buffer" @@ -111,7 +112,7 @@ func catFileBatch(ctx context.Context, repoPath, batchArg string) (WriteCloserEr go func() { stderr := strings.Builder{} err := NewCommand("cat-file"). - AddDynamicArguments(batchArg). + AddArguments(internal.CmdArg(batchArg)). Run(ctx, &RunOpts{ Dir: repoPath, Stdin: batchStdinReader, From 334eb9ecb6442cc86894bfd5fb60bdbbd3383860 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 8 Jun 2025 15:35:03 -0700 Subject: [PATCH 4/4] Fix linkt error --- modules/git/batch.go | 4 ++-- modules/git/batch_reader.go | 12 +++++++++--- modules/git/command.go | 6 ++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/modules/git/batch.go b/modules/git/batch.go index a11278fe6b1e4..10cfceb325b74 100644 --- a/modules/git/batch.go +++ b/modules/git/batch.go @@ -49,7 +49,7 @@ func newBatchCatFileWithCheck(ctx context.Context, repoPath string) (*batchCatFi } var batch batchCatFile - batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath, "--batch") + batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath, true) var check batchCatFile check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repoPath) @@ -102,7 +102,7 @@ func newBatchCommandCatFile(ctx context.Context, repoPath string) (*batchCommand } var batch batchCatFile - batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath, "--batch-command") + batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath, false) return &batchCommandCatFile{ batch: &batch, diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 33f6dd958d0ce..08920d554b745 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -12,8 +12,8 @@ import ( "strconv" "strings" - "code.gitea.io/gitea/modules/git/internal" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/util" "github.com/djherbis/buffer" "github.com/djherbis/nio/v3" @@ -89,7 +89,8 @@ func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, // catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function // batchArg is the argument to pass to cat-file --batch, e.g. "--batch" or "--batch-command" -func catFileBatch(ctx context.Context, repoPath, batchArg string) (WriteCloserError, *bufio.Reader, func()) { +// If batchOrBatchCommnd is false, it will use "--batch-command" instead of "--batch". +func catFileBatch(ctx context.Context, repoPath string, batchOrBatchCommnd bool) (WriteCloserError, *bufio.Reader, func()) { // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout batchStdinReader, batchStdinWriter := io.Pipe() @@ -109,10 +110,15 @@ func catFileBatch(ctx context.Context, repoPath, batchArg string) (WriteCloserEr cancel() }() + const ( + batchCmdArg = "--batch" + batchCommandArg = "--batch-command" + ) + go func() { stderr := strings.Builder{} err := NewCommand("cat-file"). - AddArguments(internal.CmdArg(batchArg)). + AddArguments(toTrustCmdArg(util.Iif(batchOrBatchCommnd, batchCmdArg, batchCommandArg))). Run(ctx, &RunOpts{ Dir: repoPath, Stdin: batchStdinReader, diff --git a/modules/git/command.go b/modules/git/command.go index 22f1d02339148..157bee9d716e9 100644 --- a/modules/git/command.go +++ b/modules/git/command.go @@ -207,6 +207,12 @@ func (c *Command) AddConfig(key, value string) *Command { return c } +// toTrustCmdArg converts a string (trusted as argument) to CmdArg +// In most cases, it shouldn't be used. Use AddXxx function instead +func toTrustCmdArg(arg string) internal.CmdArg { + return internal.CmdArg(arg) +} + // ToTrustedCmdArgs converts a list of strings (trusted as argument) to TrustedCmdArgs // In most cases, it shouldn't be used. Use NewCommand().AddXxx() function instead func ToTrustedCmdArgs(args []string) TrustedCmdArgs {