diff --git a/cmd/bazel-git/BUILD.bazel b/cmd/bazel-git/BUILD.bazel index 62161e53c0ed3f1845a110a08a143c04d35b0fea..2369c8ab5104431f12d29bbeff5e72475d267b9d 100644 --- a/cmd/bazel-git/BUILD.bazel +++ b/cmd/bazel-git/BUILD.bazel @@ -4,6 +4,7 @@ load(":pure.bzl", "COUPLETS") go_library( name = "bazel-git_lib", srcs = [ + "archive.go", "cat_file.go", "checkout.go", "config.go", @@ -24,6 +25,7 @@ go_library( "@com_github_go_git_go_git_v5//config:go_default_library", "@com_github_go_git_go_git_v5//plumbing:go_default_library", "@com_github_go_git_go_git_v5//plumbing/cache:go_default_library", + "@com_github_go_git_go_git_v5//plumbing/filemode:go_default_library", "@com_github_go_git_go_git_v5//plumbing/object:go_default_library", "@com_github_go_git_go_git_v5//plumbing/protocol/packp/sideband:go_default_library", "@com_github_go_git_go_git_v5//storage/filesystem:go_default_library", diff --git a/cmd/bazel-git/archive.go b/cmd/bazel-git/archive.go new file mode 100644 index 0000000000000000000000000000000000000000..b70727562960b9dbf01f43796e54172d6e6f99de --- /dev/null +++ b/cmd/bazel-git/archive.go @@ -0,0 +1,251 @@ +package main + +import ( + "archive/tar" + "io" + "os" + "strings" + + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/jessevdk/go-flags" +) + +const TarUmask = 002 + +type ArchiveCommand struct { + Format string `long:"format" choice:"tar" default:"tar" required:"yes" description:"Format of the resulting archive."` + Prefix string `long:"prefix" description:"Prepend / to paths in the archive."` + Output string `short:"o" long:"output" default:"-"` + Args struct { + Revision plumbing.Revision `positional-arg-name:"" description:"The tree or commit to produce an archive for." required:"yes"` + } `positional-args:"yes"` +} + +var archiveCommand ArchiveCommand + +func (cmd *ArchiveCommand) Execute(rest []string) error { + if len(rest) != 0 { + return &flags.Error{Type: flags.ErrDuplicatedFlag, Message: "invalid number of positional arguments"} + } + + repo, err := git.PlainOpen(options.GitDir) + if err != nil { + return err + } + + commit, err := resolveCommit(repo, cmd.Args.Revision) + if err != nil { + return err + } + + var output io.Writer + if cmd.Output == "-" { + output = os.Stdout + } else { + file, err := os.OpenFile(cmd.Output, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return err + } + defer file.Close() + output = file + } + + archive := tar.NewWriter(output) + defer archive.Close() + + if err = writeTar(repo, commit, cmd.Prefix, archive); err != nil { + return err + } + return nil +} + +func resolveCommit(repo *git.Repository, revision plumbing.Revision) (*object.Commit, error) { + hash, err := repo.ResolveRevision(revision) + if err != nil { + return nil, err + } + return repo.CommitObject(*hash) +} + +func writeTar(repo *git.Repository, commit *object.Commit, prefix string, archive *tar.Writer) error { + walker, err := treeWalker(commit) + if err != nil { + return err + } + defer walker.Close() + + if err = writeCommitHash(commit, archive); err != nil { + return err + } + + for { + name, entry, err := walker.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if err = writeMetadata(repo, commit, name, entry, prefix, archive); err != nil { + return err + } + + if entry.Mode.IsRegular() || entry.Mode == filemode.Executable { + if err := writeContents(repo, entry, archive); err != nil { + return err + } + } + } + + return nil +} + +func treeWalker(commit *object.Commit) (*object.TreeWalker, error) { + tree, err := commit.Tree() + if err != nil { + return nil, err + } + + recursive := true + var seen map[plumbing.Hash]bool + return object.NewTreeWalker(tree, recursive, seen), nil +} + +func writeCommitHash(commit *object.Commit, archive *tar.Writer) error { + // This is needed to support `git get-tar-commit-id` + // See: https://github.com/git/git/blob/0f3415f1f8478b05e64db11eb8aaa2915e48fef6/archive-tar.c#L329-L357 + paxRecords := make(map[string]string) + paxRecords["comment"] = commit.Hash.String() + err := archive.WriteHeader(&tar.Header{ + Typeflag: tar.TypeXGlobalHeader, + PAXRecords: paxRecords, + }) + if err != nil { + return err + } + return nil +} + +func writeMetadata(repo *git.Repository, commit *object.Commit, name string, entry object.TreeEntry, prefix string, archive *tar.Writer) error { + name = adjustName(entry, name, prefix) + + size, err := objectSize(repo, entry) + if err != nil { + return err + } + + mode, err := fileMode(entry) + if err != nil { + return err + } + + link, err := symlinkTarget(repo, entry) + if err != nil { + return err + } + + typeFlag := fileTypeFlag(entry) + + header := &tar.Header{ + Name: name, + Linkname: link, + Mode: mode, + Size: size, + Typeflag: typeFlag, + ModTime: commit.Committer.When, + } + + err = archive.WriteHeader(header) + if err != nil { + return err + } + + return nil +} + +func adjustName(entry object.TreeEntry, name string, prefix string) string { + name = prefix + name + if entry.Mode == filemode.Dir { + return name + string(os.PathSeparator) + } + return name +} + +func objectSize(repo *git.Repository, entry object.TreeEntry) (int64, error) { + // We only care about the size of regular files (including executables) + // See: https://github.com/git/git/blob/0f3415f1f8478b05e64db11eb8aaa2915e48fef6/archive-tar.c#L223 + if entry.Mode.IsRegular() || entry.Mode == filemode.Executable { + return repo.Storer.EncodedObjectSize(entry.Hash) + } + return 0, nil +} + +func fileMode(entry object.TreeEntry) (int64, error) { + osFileMode, err := entry.Mode.ToOSFileMode() + if err != nil { + return 0, err + } + mode := int64(osFileMode) + + // We need to apply the TAR umask unless it's a symlink + // See: https://github.com/git/git/blob/0f3415f1f8478b05e64db11eb8aaa2915e48fef6/archive-tar.c#L267-L275 + if entry.Mode == filemode.Symlink { + return mode, nil + } + return mode & ^TarUmask, nil +} + +func fileTypeFlag(entry object.TreeEntry) byte { + switch entry.Mode { + case filemode.Symlink: + return tar.TypeSymlink + case filemode.Dir: + return tar.TypeDir + default: + return tar.TypeReg + } +} + +func symlinkTarget(repo *git.Repository, entry object.TreeEntry) (string, error) { + if entry.Mode != filemode.Symlink { + return "", nil + } + + // Git represents a symlink target as the blob contents, with a trailing newline + contents := new(strings.Builder) + if err := writeContents(repo, entry, contents); err != nil { + return "", err + } + return strings.TrimSpace(contents.String()), nil +} + +func writeContents(repo *git.Repository, entry object.TreeEntry, writer io.Writer) error { + blob, err := repo.BlobObject(entry.Hash) + if err != nil { + return err + } + + reader, err := blob.Reader() + if err != nil { + return err + } + + if _, err = io.Copy(writer, reader); err != nil { + return err + } + + return nil +} + +func init() { + parser.AddCommand("archive", + "Create an archive of files from a named tree.", + `Creates an archive of the specified format containing the tree structure for the named tree, and writes it out to the standard output. If is specified it is prepended to the filenames in the archive. + +**git archive** behaves differently when given a tree ID as opposed to a commit ID or tag ID. When a tree ID is provided, the current time is used as the modification time of each file in the archive. On the other hand, when a commit ID or tag ID is provided, the commit time as recorded in the referenced commit object is used instead. Additionally the commit ID is stored in a global extended pax header if the tar format is used; it can be extracted using **git get-tar-commit-id**. In ZIP files it is stored as a file comment.`, + &archiveCommand) +}