Skip to content

Refactor CatFile batch implementation and introduce batch-command for git 2.36 #34651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 109 additions & 16 deletions modules/git/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,40 +8,133 @@ import (
"context"
)

type Batch struct {
type batchCatFile struct {
cancel context.CancelFunc
Reader *bufio.Reader
Writer WriteCloserError
}

// NewBatch creates a new batch for the given repository, the Close must be invoked before release the batch
func NewBatch(ctx context.Context, repoPath string) (*Batch, error) {
func (b *batchCatFile) Close() {
if b.cancel != nil {
b.cancel()
b.Reader = nil
b.Writer = nil
b.cancel = nil
}
}

type Batch interface {
Write([]byte) (int, error)
WriteCheck([]byte) (int, error)
Reader() *bufio.Reader
CheckReader() *bufio.Reader
Close()
}

// batchCatFileWithCheck implements the Batch interface using the "cat-file --batch" command and "cat-file --batch-check" command
// ref: https://git-scm.com/docs/git-cat-file#Documentation/git-cat-file.txt---batch
// To align with --batch-command, we creates the two commands both at the same time if git version is lower than 2.36
type batchCatFileWithCheck struct {
batch *batchCatFile
batchCheck *batchCatFile
}

var _ Batch = &batchCatFileWithCheck{}

// newBatchCatFileWithCheck creates a new batch and a new batch check for the given repository, the Close must be invoked before release the batch
func newBatchCatFileWithCheck(ctx context.Context, repoPath string) (*batchCatFileWithCheck, error) {
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := ensureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

var batch Batch
batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath)
return &batch, nil
var batch batchCatFile
batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath, true)

var check batchCatFile
check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repoPath)

return &batchCatFileWithCheck{
batch: &batch,
batchCheck: &check,
}, nil
}

func (b *batchCatFileWithCheck) Write(bs []byte) (int, error) {
return b.batch.Writer.Write(bs)
}

func (b *batchCatFileWithCheck) WriteCheck(bs []byte) (int, error) {
return b.batchCheck.Writer.Write(bs)
}

func (b *batchCatFileWithCheck) Reader() *bufio.Reader {
return b.batch.Reader
}

func NewBatchCheck(ctx context.Context, repoPath string) (*Batch, error) {
func (b *batchCatFileWithCheck) CheckReader() *bufio.Reader {
return b.batchCheck.Reader
}

func (b *batchCatFileWithCheck) Close() {
if b.batch != nil {
b.batch.Close()
b.batch = nil
}
if b.batchCheck != nil {
b.batchCheck.Close()
b.batchCheck = nil
}
}

// batchCommandCatFile implements the Batch interface using the "cat-file --batch-command" command
// ref: https://git-scm.com/docs/git-cat-file#Documentation/git-cat-file.txt---batch-command
type batchCommandCatFile struct {
batch *batchCatFile
}

var _ Batch = &batchCommandCatFile{}

func newBatchCommandCatFile(ctx context.Context, repoPath string) (*batchCommandCatFile, error) {
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := ensureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

var check Batch
check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repoPath)
return &check, nil
var batch batchCatFile
batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath, false)

return &batchCommandCatFile{
batch: &batch,
}, nil
}

func (b *Batch) Close() {
if b.cancel != nil {
b.cancel()
b.Reader = nil
b.Writer = nil
b.cancel = nil
func (b *batchCommandCatFile) Write(bs []byte) (int, error) {
return b.batch.Writer.Write(append([]byte("contents "), bs...))
}

func (b *batchCommandCatFile) WriteCheck(bs []byte) (int, error) {
return b.batch.Writer.Write(append([]byte("info "), bs...))
}

func (b *batchCommandCatFile) Reader() *bufio.Reader {
return b.batch.Reader
}

func (b *batchCommandCatFile) CheckReader() *bufio.Reader {
return b.batch.Reader
}

func (b *batchCommandCatFile) Close() {
if b.batch != nil {
b.batch.Close()
b.batch = nil
}
}

func NewBatch(ctx context.Context, repoPath string) (Batch, error) {
if DefaultFeatures().SupportCatFileBatchCommand {
return newBatchCommandCatFile(ctx, repoPath)
}
return newBatchCatFileWithCheck(ctx, repoPath)
}
13 changes: 11 additions & 2 deletions modules/git/batch_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"strings"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"

"github.com/djherbis/buffer"
"github.com/djherbis/nio/v3"
Expand Down Expand Up @@ -87,7 +88,9 @@ func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError,
}

// catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
// batchArg is the argument to pass to cat-file --batch, e.g. "--batch" or "--batch-command"
// If batchOrBatchCommnd is false, it will use "--batch-command" instead of "--batch".
func catFileBatch(ctx context.Context, repoPath string, batchOrBatchCommnd bool) (WriteCloserError, *bufio.Reader, func()) {
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout
batchStdinReader, batchStdinWriter := io.Pipe()
Expand All @@ -107,9 +110,15 @@ func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufi
cancel()
}()

const (
batchCmdArg = "--batch"
batchCommandArg = "--batch-command"
)

go func() {
stderr := strings.Builder{}
err := NewCommand("cat-file", "--batch").
err := NewCommand("cat-file").
AddArguments(toTrustCmdArg(util.Iif(batchOrBatchCommnd, batchCmdArg, batchCommandArg))).
Run(ctx, &RunOpts{
Dir: repoPath,
Stdin: batchStdinReader,
Expand Down
11 changes: 6 additions & 5 deletions modules/git/blob_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,17 @@ type Blob struct {
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) {
wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
if err != nil {
return nil, err
}

_, err = wr.Write([]byte(b.ID.String() + "\n"))
_, err = batch.Write([]byte(b.ID.String() + "\n"))
if err != nil {
cancel()
return nil, err
}
rd := batch.Reader()
_, _, size, err := ReadBatchLine(rd)
if err != nil {
cancel()
Expand Down Expand Up @@ -67,18 +68,18 @@ func (b *Blob) Size() int64 {
return b.size
}

wr, rd, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx)
batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
defer cancel()
_, err = wr.Write([]byte(b.ID.String() + "\n"))
_, err = batch.WriteCheck([]byte(b.ID.String() + "\n"))
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
_, _, b.size, err = ReadBatchLine(rd)
_, _, b.size, err = ReadBatchLine(batch.CheckReader())
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
Expand Down
6 changes: 6 additions & 0 deletions modules/git/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ func (c *Command) AddConfig(key, value string) *Command {
return c
}

// toTrustCmdArg converts a string (trusted as argument) to CmdArg
// In most cases, it shouldn't be used. Use AddXxx function instead
func toTrustCmdArg(arg string) internal.CmdArg {
return internal.CmdArg(arg)
}

// ToTrustedCmdArgs converts a list of strings (trusted as argument) to TrustedCmdArgs
// In most cases, it shouldn't be used. Use NewCommand().AddXxx() function instead
func ToTrustedCmdArgs(args []string) TrustedCmdArgs {
Expand Down
12 changes: 7 additions & 5 deletions modules/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ const RequiredVersion = "2.0.0" // the minimum Git version required
type Features struct {
gitVersion *version.Version

UsingGogit bool
SupportProcReceive bool // >= 2.29
SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’
SupportedObjectFormats []ObjectFormat // sha1, sha256
SupportCheckAttrOnBare bool // >= 2.40
UsingGogit bool
SupportProcReceive bool // >= 2.29
SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’
SupportedObjectFormats []ObjectFormat // sha1, sha256
SupportCheckAttrOnBare bool // >= 2.40
SupportCatFileBatchCommand bool // >= 2.36, support `git cat-file --batch-command`
}

var (
Expand Down Expand Up @@ -79,6 +80,7 @@ func loadGitVersionFeatures() (*Features, error) {
features.SupportedObjectFormats = append(features.SupportedObjectFormats, Sha256ObjectFormat)
}
features.SupportCheckAttrOnBare = features.CheckVersionAtLeast("2.40")
features.SupportCatFileBatchCommand = features.CheckVersionAtLeast("2.36")
return features, nil
}

Expand Down
5 changes: 3 additions & 2 deletions modules/git/languagestats/language_stats_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,21 @@ import (
func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) {
// We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary.
// so let's create a batch stdin and stdout
batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx)
batch, cancel, err := repo.CatFileBatch(repo.Ctx)
if err != nil {
return nil, err
}
defer cancel()

writeID := func(id string) error {
_, err := batchStdinWriter.Write([]byte(id + "\n"))
_, err := batch.Write([]byte(id + "\n"))
return err
}

if err := writeID(commitID); err != nil {
return nil, err
}
batchReader := batch.Reader()
shaBytes, typ, size, err := git.ReadBatchLine(batchReader)
if typ != "commit" {
log.Debug("Unable to get commit for: %s. Err: %v", commitID, err)
Expand Down
19 changes: 6 additions & 13 deletions modules/git/pipeline/lfs_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err

// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout
batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx)
batch, cancel, err := repo.CatFileBatch(repo.Ctx)
if err != nil {
return nil, err
}
Expand All @@ -60,17 +60,14 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
fnameBuf := make([]byte, 4096)
modeBuf := make([]byte, 40)
workingShaBuf := make([]byte, objectID.Type().FullLength()/2)
batchReader := batch.Reader()

for scan.Scan() {
// Get the next commit ID
commitID := scan.Bytes()

// push the commit to the cat-file --batch process
_, err := batchStdinWriter.Write(commitID)
if err != nil {
return nil, err
}
_, err = batchStdinWriter.Write([]byte{'\n'})
_, err := batch.Write(append(commitID, '\n'))
if err != nil {
return nil, err
}
Expand All @@ -92,7 +89,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
if err != nil {
return nil, err
}
_, err = batchStdinWriter.Write([]byte(id + "\n"))
_, err = batch.Write([]byte(id + "\n"))
if err != nil {
return nil, err
}
Expand All @@ -107,7 +104,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
return nil, err
}

if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil {
if _, err := batch.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil {
return nil, err
}
curPath = ""
Expand Down Expand Up @@ -139,11 +136,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
return nil, err
}
if len(trees) > 0 {
_, err := batchStdinWriter.Write(trees[len(trees)-1])
if err != nil {
return nil, err
}
_, err = batchStdinWriter.Write([]byte("\n"))
_, err := batch.Write(append(trees[len(trees)-1], '\n'))
if err != nil {
return nil, err
}
Expand Down
Loading