From 75885b652222ec5eb8fb9bf89137d771b08ba429 Mon Sep 17 00:00:00 2001 From: Alex Tercete Date: Mon, 13 May 2024 10:27:33 +0100 Subject: [PATCH 1/4] feat(archive): create tar to stdout from revision --- cmd/bazel-git/BUILD.bazel | 2 + cmd/bazel-git/archive.go | 218 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 cmd/bazel-git/archive.go diff --git a/cmd/bazel-git/BUILD.bazel b/cmd/bazel-git/BUILD.bazel index 62161e5..2369c8a 100644 --- a/cmd/bazel-git/BUILD.bazel +++ b/cmd/bazel-git/BUILD.bazel @@ -4,6 +4,7 @@ load(":pure.bzl", "COUPLETS") go_library( name = "bazel-git_lib", srcs = [ + "archive.go", "cat_file.go", "checkout.go", "config.go", @@ -24,6 +25,7 @@ go_library( "@com_github_go_git_go_git_v5//config:go_default_library", "@com_github_go_git_go_git_v5//plumbing:go_default_library", "@com_github_go_git_go_git_v5//plumbing/cache:go_default_library", + "@com_github_go_git_go_git_v5//plumbing/filemode:go_default_library", "@com_github_go_git_go_git_v5//plumbing/object:go_default_library", "@com_github_go_git_go_git_v5//plumbing/protocol/packp/sideband:go_default_library", "@com_github_go_git_go_git_v5//storage/filesystem:go_default_library", diff --git a/cmd/bazel-git/archive.go b/cmd/bazel-git/archive.go new file mode 100644 index 0000000..941b715 --- /dev/null +++ b/cmd/bazel-git/archive.go @@ -0,0 +1,218 @@ +package main + +import ( + "archive/tar" + "io" + "os" + "strings" + + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/jessevdk/go-flags" +) + +const TarUmask = 002 + +type ArchiveCommand struct { + Format string `long:"format" choice:"tar" default:"tar" required:"yes" description:"Format of the resulting archive."` + Args struct { + Revision plumbing.Revision `positional-arg-name:"" description:"The tree or commit to produce an archive for." required:"yes"` + } `positional-args:"yes"` +} + +var archiveCommand ArchiveCommand + +func (cmd *ArchiveCommand) Execute(rest []string) error { + if len(rest) != 0 { + return &flags.Error{Type: flags.ErrDuplicatedFlag, Message: "invalid number of positional arguments"} + } + + repo, err := git.PlainOpen(options.GitDir) + if err != nil { + return err + } + + commit, err := resolveCommit(repo, cmd.Args.Revision) + if err != nil { + return err + } + + output := os.Stdout + archive := tar.NewWriter(output) + defer archive.Close() + + if err = writeTar(repo, commit, archive); err != nil { + return err + } + return nil +} + +func resolveCommit(repo *git.Repository, revision plumbing.Revision) (*object.Commit, error) { + hash, err := repo.ResolveRevision(revision) + if err != nil { + return nil, err + } + return repo.CommitObject(*hash) +} + +func writeTar(repo *git.Repository, commit *object.Commit, archive *tar.Writer) error { + walker, err := treeWalker(commit) + if err != nil { + return err + } + defer walker.Close() + + for { + name, entry, err := walker.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if err = writeMetadata(repo, commit, name, entry, archive); err != nil { + return err + } + + if entry.Mode.IsRegular() || entry.Mode == filemode.Executable { + if err := writeContents(repo, entry, archive); err != nil { + return err + } + } + } + + return nil +} + +func treeWalker(commit *object.Commit) (*object.TreeWalker, error) { + tree, err := commit.Tree() + if err != nil { + return nil, err + } + + recursive := true + var seen map[plumbing.Hash]bool + return object.NewTreeWalker(tree, recursive, seen), nil +} + +func writeMetadata(repo *git.Repository, commit *object.Commit, name string, entry object.TreeEntry, archive *tar.Writer) error { + name = adjustName(entry, name) + + size, err := objectSize(repo, entry) + if err != nil { + return err + } + + mode, err := fileMode(entry) + if err != nil { + return err + } + + link, err := symlinkTarget(repo, entry) + if err != nil { + return err + } + + typeFlag := fileTypeFlag(entry) + + header := &tar.Header{ + Name: name, + Linkname: link, + Mode: mode, + Size: size, + Typeflag: typeFlag, + ModTime: commit.Committer.When, + } + + err = archive.WriteHeader(header) + if err != nil { + return err + } + + return nil +} + +func adjustName(entry object.TreeEntry, name string) string { + if entry.Mode == filemode.Dir { + return name + string(os.PathSeparator) + } + return name +} + +func objectSize(repo *git.Repository, entry object.TreeEntry) (int64, error) { + // We only care about the size of regular files (including executables) + // See: https://github.com/git/git/blob/0f3415f1f8478b05e64db11eb8aaa2915e48fef6/archive-tar.c#L223 + if entry.Mode.IsRegular() || entry.Mode == filemode.Executable { + return repo.Storer.EncodedObjectSize(entry.Hash) + } + return 0, nil +} + +func fileMode(entry object.TreeEntry) (int64, error) { + osFileMode, err := entry.Mode.ToOSFileMode() + if err != nil { + return 0, err + } + mode := int64(osFileMode) + + // We need to apply the TAR umask unless it's a symlink + // See: https://github.com/git/git/blob/0f3415f1f8478b05e64db11eb8aaa2915e48fef6/archive-tar.c#L267-L275 + if entry.Mode == filemode.Symlink { + return mode, nil + } + return mode & ^TarUmask, nil +} + +func fileTypeFlag(entry object.TreeEntry) byte { + switch entry.Mode { + case filemode.Symlink: + return tar.TypeSymlink + case filemode.Dir: + return tar.TypeDir + default: + return tar.TypeReg + } +} + +func symlinkTarget(repo *git.Repository, entry object.TreeEntry) (string, error) { + if entry.Mode != filemode.Symlink { + return "", nil + } + + // Git represents a symlink target as the blob contents, with a trailing newline + contents := new(strings.Builder) + if err := writeContents(repo, entry, contents); err != nil { + return "", err + } + return strings.TrimSpace(contents.String()), nil +} + +func writeContents(repo *git.Repository, entry object.TreeEntry, writer io.Writer) error { + blob, err := repo.BlobObject(entry.Hash) + if err != nil { + return err + } + + reader, err := blob.Reader() + if err != nil { + return err + } + + if _, err = io.Copy(writer, reader); err != nil { + return err + } + + return nil +} + +func init() { + parser.AddCommand("archive", + "Create an archive of files from a named tree.", + `Creates an archive of the specified format containing the tree structure for the named tree, and writes it out to the standard output. If is specified it is prepended to the filenames in the archive. + +**git archive** behaves differently when given a tree ID as opposed to a commit ID or tag ID. When a tree ID is provided, the current time is used as the modification time of each file in the archive. On the other hand, when a commit ID or tag ID is provided, the commit time as recorded in the referenced commit object is used instead. Additionally the commit ID is stored in a global extended pax header if the tar format is used; it can be extracted using **git get-tar-commit-id**. In ZIP files it is stored as a file comment.`, + &archiveCommand) +} -- GitLab From 25474a9d7f38001ed66876c2fadcf9e301bdac25 Mon Sep 17 00:00:00 2001 From: Alex Tercete Date: Mon, 20 May 2024 10:32:40 +0100 Subject: [PATCH 2/4] feat(archive): support prefix option As of Git 2.34.1, Git doesn't add the prefix to symlink targets (which seems to be a bug). I've reproduced this behaviour to keep feature parity. --- cmd/bazel-git/archive.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cmd/bazel-git/archive.go b/cmd/bazel-git/archive.go index 941b715..ca04e2f 100644 --- a/cmd/bazel-git/archive.go +++ b/cmd/bazel-git/archive.go @@ -17,6 +17,7 @@ const TarUmask = 002 type ArchiveCommand struct { Format string `long:"format" choice:"tar" default:"tar" required:"yes" description:"Format of the resulting archive."` + Prefix string `long:"prefix" description:"Prepend / to paths in the archive."` Args struct { Revision plumbing.Revision `positional-arg-name:"" description:"The tree or commit to produce an archive for." required:"yes"` } `positional-args:"yes"` @@ -43,7 +44,7 @@ func (cmd *ArchiveCommand) Execute(rest []string) error { archive := tar.NewWriter(output) defer archive.Close() - if err = writeTar(repo, commit, archive); err != nil { + if err = writeTar(repo, commit, cmd.Prefix, archive); err != nil { return err } return nil @@ -57,7 +58,7 @@ func resolveCommit(repo *git.Repository, revision plumbing.Revision) (*object.Co return repo.CommitObject(*hash) } -func writeTar(repo *git.Repository, commit *object.Commit, archive *tar.Writer) error { +func writeTar(repo *git.Repository, commit *object.Commit, prefix string, archive *tar.Writer) error { walker, err := treeWalker(commit) if err != nil { return err @@ -73,7 +74,7 @@ func writeTar(repo *git.Repository, commit *object.Commit, archive *tar.Writer) return err } - if err = writeMetadata(repo, commit, name, entry, archive); err != nil { + if err = writeMetadata(repo, commit, name, entry, prefix, archive); err != nil { return err } @@ -98,8 +99,8 @@ func treeWalker(commit *object.Commit) (*object.TreeWalker, error) { return object.NewTreeWalker(tree, recursive, seen), nil } -func writeMetadata(repo *git.Repository, commit *object.Commit, name string, entry object.TreeEntry, archive *tar.Writer) error { - name = adjustName(entry, name) +func writeMetadata(repo *git.Repository, commit *object.Commit, name string, entry object.TreeEntry, prefix string, archive *tar.Writer) error { + name = adjustName(entry, name, prefix) size, err := objectSize(repo, entry) if err != nil { @@ -135,7 +136,8 @@ func writeMetadata(repo *git.Repository, commit *object.Commit, name string, ent return nil } -func adjustName(entry object.TreeEntry, name string) string { +func adjustName(entry object.TreeEntry, name string, prefix string) string { + name = prefix + name if entry.Mode == filemode.Dir { return name + string(os.PathSeparator) } -- GitLab From d5def3a4e48af98a0e05ecf9d17ef0d860b9e85b Mon Sep 17 00:00:00 2001 From: Alex Tercete Date: Mon, 20 May 2024 11:05:14 +0100 Subject: [PATCH 3/4] feat(archive): support output option --- cmd/bazel-git/archive.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/cmd/bazel-git/archive.go b/cmd/bazel-git/archive.go index ca04e2f..ed0f2ef 100644 --- a/cmd/bazel-git/archive.go +++ b/cmd/bazel-git/archive.go @@ -18,6 +18,7 @@ const TarUmask = 002 type ArchiveCommand struct { Format string `long:"format" choice:"tar" default:"tar" required:"yes" description:"Format of the resulting archive."` Prefix string `long:"prefix" description:"Prepend / to paths in the archive."` + Output string `short:"o" long:"output" default:"-"` Args struct { Revision plumbing.Revision `positional-arg-name:"" description:"The tree or commit to produce an archive for." required:"yes"` } `positional-args:"yes"` @@ -40,7 +41,18 @@ func (cmd *ArchiveCommand) Execute(rest []string) error { return err } - output := os.Stdout + var output io.Writer + if cmd.Output == "-" { + output = os.Stdout + } else { + file, err := os.OpenFile(cmd.Output, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return err + } + defer file.Close() + output = file + } + archive := tar.NewWriter(output) defer archive.Close() -- GitLab From 8bfd19f16e859abbf552d17b97f29304c20a461a Mon Sep 17 00:00:00 2001 From: Alex Tercete Date: Wed, 22 May 2024 12:07:28 +0100 Subject: [PATCH 4/4] feat(archive): support `get-tar-commit-id` --- cmd/bazel-git/archive.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/cmd/bazel-git/archive.go b/cmd/bazel-git/archive.go index ed0f2ef..b707275 100644 --- a/cmd/bazel-git/archive.go +++ b/cmd/bazel-git/archive.go @@ -77,6 +77,10 @@ func writeTar(repo *git.Repository, commit *object.Commit, prefix string, archiv } defer walker.Close() + if err = writeCommitHash(commit, archive); err != nil { + return err + } + for { name, entry, err := walker.Next() if err == io.EOF { @@ -111,6 +115,21 @@ func treeWalker(commit *object.Commit) (*object.TreeWalker, error) { return object.NewTreeWalker(tree, recursive, seen), nil } +func writeCommitHash(commit *object.Commit, archive *tar.Writer) error { + // This is needed to support `git get-tar-commit-id` + // See: https://github.com/git/git/blob/0f3415f1f8478b05e64db11eb8aaa2915e48fef6/archive-tar.c#L329-L357 + paxRecords := make(map[string]string) + paxRecords["comment"] = commit.Hash.String() + err := archive.WriteHeader(&tar.Header{ + Typeflag: tar.TypeXGlobalHeader, + PAXRecords: paxRecords, + }) + if err != nil { + return err + } + return nil +} + func writeMetadata(repo *git.Repository, commit *object.Commit, name string, entry object.TreeEntry, prefix string, archive *tar.Writer) error { name = adjustName(entry, name, prefix) -- GitLab