forgejo/modules/git/repo_attribute.go

314 lines
8.2 KiB
Go
Raw Normal View History

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"bytes"
"context"
"fmt"
"os"
"strings"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional"
)
var LinguistAttributes = []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"}
// GitAttribute exposes an attribute from the .gitattribute file
type GitAttribute string //nolint:revive
// IsSpecified returns true if the gitattribute is set and not empty
func (ca GitAttribute) IsSpecified() bool {
return ca != "" && ca != "unspecified"
}
// String returns the value of the attribute or "" if unspecified
func (ca GitAttribute) String() string {
if !ca.IsSpecified() {
return ""
}
return string(ca)
}
// Prefix returns the value of the attribute before any question mark '?'
//
// sometimes used within gitlab-language: https://docs.gitlab.com/ee/user/project/highlighting.html#override-syntax-highlighting-for-a-file-type
func (ca GitAttribute) Prefix() string {
s := ca.String()
if i := strings.IndexByte(s, '?'); i >= 0 {
return s[:i]
}
return s
}
// Bool returns true if "set"/"true", false if "unset"/"false", none otherwise
func (ca GitAttribute) Bool() optional.Option[bool] {
switch ca {
case "set", "true":
return optional.Some(true)
case "unset", "false":
return optional.Some(false)
}
return optional.None[bool]()
}
// GitAttributeFirst returns the first specified attribute
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributeFirst(treeish, filename string, attributes ...string) (GitAttribute, error) {
values, err := repo.GitAttributes(treeish, filename, attributes...)
if err != nil {
return "", err
}
for _, a := range attributes {
if values[a].IsSpecified() {
return values[a], nil
}
}
return "", nil
}
func (repo *Repository) gitCheckAttrCommand(treeish string, attributes ...string) (*Command, *RunOpts, context.CancelFunc, error) {
if len(attributes) == 0 {
return nil, nil, nil, fmt.Errorf("no provided attributes to check-attr")
}
env := os.Environ()
var deleteTemporaryFile context.CancelFunc
// git < 2.40 cannot run check-attr on bare repo, but needs INDEX + WORK_TREE
hasIndex := treeish == ""
if !hasIndex && !SupportCheckAttrOnBare {
indexFilename, worktree, cancel, err := repo.ReadTreeToTemporaryIndex(treeish)
if err != nil {
return nil, nil, nil, err
}
deleteTemporaryFile = cancel
env = append(env, "GIT_INDEX_FILE="+indexFilename, "GIT_WORK_TREE="+worktree)
hasIndex = true
// clear treeish to read from provided index/work_tree
treeish = ""
}
ctx, cancel := context.WithCancel(repo.Ctx)
if deleteTemporaryFile != nil {
ctxCancel := cancel
cancel = func() {
ctxCancel()
deleteTemporaryFile()
}
}
cmd := NewCommand(ctx, "check-attr", "-z")
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 02:30:43 +00:00
if hasIndex {
cmd.AddArguments("--cached")
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 02:30:43 +00:00
}
if len(treeish) > 0 {
cmd.AddArguments("--source")
cmd.AddDynamicArguments(treeish)
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 02:30:43 +00:00
}
cmd.AddDynamicArguments(attributes...)
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 02:30:43 +00:00
// Version 2.43.1 has a bug where the behavior of `GIT_FLUSH` is flipped.
// Ref: https://lore.kernel.org/git/CABn0oJvg3M_kBW-u=j3QhKnO=6QOzk-YFTgonYw_UvFS1NTX4g@mail.gmail.com
if InvertedGitFlushEnv {
env = append(env, "GIT_FLUSH=0")
} else {
env = append(env, "GIT_FLUSH=1")
}
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 02:30:43 +00:00
return cmd, &RunOpts{
Env: env,
Dir: repo.Path,
}, cancel, nil
}
// GitAttributes returns gitattribute.
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributes(treeish, filename string, attributes ...string) (map[string]GitAttribute, error) {
cmd, runOpts, cancel, err := repo.gitCheckAttrCommand(treeish, attributes...)
if err != nil {
return nil, err
}
defer cancel()
stdOut := new(bytes.Buffer)
runOpts.Stdout = stdOut
stdErr := new(bytes.Buffer)
runOpts.Stderr = stdErr
cmd.AddDashesAndList(filename)
if err := cmd.Run(runOpts); err != nil {
return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String())
}
// FIXME: This is incorrect on versions < 1.8.5
fields := bytes.Split(stdOut.Bytes(), []byte{'\000'})
if len(fields)%3 != 1 {
return nil, fmt.Errorf("wrong number of fields in return from check-attr")
}
values := make(map[string]GitAttribute, len(attributes))
for ; len(fields) >= 3; fields = fields[3:] {
// filename := string(fields[0])
attribute := string(fields[1])
value := string(fields[2])
values[attribute] = GitAttribute(value)
}
return values, nil
}
type attributeTriple struct {
Filename string
Attribute string
Value string
}
type nulSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
closed chan struct{}
working attributeTriple
pos int
}
func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l, read := len(p), 0
nulIdx := bytes.IndexByte(p, '\x00')
for nulIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nulIdx]...)
switch wr.pos {
case 0:
wr.working = attributeTriple{
Filename: string(wr.tmp),
}
case 1:
wr.working.Attribute = string(wr.tmp)
case 2:
wr.working.Value = string(wr.tmp)
}
wr.tmp = wr.tmp[:0]
wr.pos++
if wr.pos > 2 {
wr.attributes <- wr.working
wr.pos = 0
}
read += nulIdx + 1
if l > read {
p = p[nulIdx+1:]
nulIdx = bytes.IndexByte(p, '\x00')
} else {
return l, nil
}
}
wr.tmp = append(wr.tmp, p...)
return len(p), nil
}
func (wr *nulSeparatedAttributeWriter) Close() error {
select {
case <-wr.closed:
return nil
default:
}
close(wr.attributes)
close(wr.closed)
return nil
}
// GitAttributeChecker creates an AttributeChecker for the given repository and provided commit ID.
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributeChecker(treeish string, attributes ...string) (AttributeChecker, error) {
cmd, runOpts, cancel, err := repo.gitCheckAttrCommand(treeish, attributes...)
if err != nil {
return AttributeChecker{}, err
}
ac := AttributeChecker{
attributeNumber: len(attributes),
ctx: cmd.parentContext,
cancel: cancel, // will be cancelled on Close
}
stdinReader, stdinWriter, err := os.Pipe()
if err != nil {
ac.cancel()
return AttributeChecker{}, err
}
ac.stdinWriter = stdinWriter // will be closed on Close
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, len(attributes))
lw.closed = make(chan struct{})
ac.attributesCh = lw.attributes
cmd.AddArguments("--stdin")
go func() {
defer stdinReader.Close()
defer lw.Close()
stdErr := new(bytes.Buffer)
runOpts.Stdin = stdinReader
runOpts.Stdout = lw
runOpts.Stderr = stdErr
err := cmd.Run(runOpts)
if err != nil && // If there is an error we need to return but:
cmd.parentContext.Err() != err && // 1. Ignore the context error if the context is cancelled or exceeds the deadline (RunWithContext could return c.ctx.Err() which is Canceled or DeadlineExceeded)
err.Error() != "signal: killed" { // 2. We should not pass up errors due to the program being killed
log.Error("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String())
}
}()
return ac, nil
}
type AttributeChecker struct {
ctx context.Context
cancel context.CancelFunc
stdinWriter *os.File
attributeNumber int
attributesCh <-chan attributeTriple
}
func (ac AttributeChecker) CheckPath(path string) (map[string]GitAttribute, error) {
if err := ac.ctx.Err(); err != nil {
return nil, err
}
if _, err := ac.stdinWriter.Write([]byte(path + "\x00")); err != nil {
return nil, err
}
rs := make(map[string]GitAttribute)
for i := 0; i < ac.attributeNumber; i++ {
select {
case attr, ok := <-ac.attributesCh:
if !ok {
return nil, ac.ctx.Err()
}
rs[attr.Attribute] = GitAttribute(attr.Value)
case <-ac.ctx.Done():
return nil, ac.ctx.Err()
}
}
return rs, nil
}
func (ac AttributeChecker) Close() error {
ac.cancel()
return ac.stdinWriter.Close()
}