[gitea] Refactor parseSignatureFromCommitLine (#29054)

Replace #28849. Thanks to @yp05327 for the looking into the problem.
Fix #28840

The old behavior of newSignatureFromCommitline is not right. The new
parseSignatureFromCommitLine:

1. never fails
2. only accept one format (if there is any other, it could be easily added)

And add some tests.

(cherry picked from commit a24e1da7e9e38fc5f5c84c083d122c0cc3da4b74)
This commit is contained in:
wxiaoguang 2024-02-09 11:02:53 +08:00 committed by Earl Warren
parent 045bd097c6
commit 83bb3cf86a
No known key found for this signature in database
GPG key ID: 0579CB2928A78A00
8 changed files with 104 additions and 149 deletions

View file

@ -271,7 +271,7 @@ func GetLatestCommitTime(ctx context.Context, repoPath string) (time.Time, error
return time.Time{}, err
}
commitTime := strings.TrimSpace(stdout)
return time.Parse(GitTimeLayout, commitTime)
return time.Parse("Mon Jan _2 15:04:05 2006 -0700", commitTime)
}
// DivergeObject represents commit count diverging commits

View file

@ -183,11 +183,7 @@ func parseTagRef(objectFormat ObjectFormat, ref map[string]string) (tag *Tag, er
}
}
tag.Tagger, err = newSignatureFromCommitline([]byte(ref["creator"]))
if err != nil {
return nil, fmt.Errorf("parse tagger: %w", err)
}
tag.Tagger = parseSignatureFromCommitLine(ref["creator"])
tag.Message = ref["contents"]
// strip PGP signature if present in contents field
pgpStart := strings.Index(tag.Message, beginpgp)

View file

@ -227,7 +227,7 @@ func TestRepository_parseTagRef(t *testing.T) {
ID: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"),
Object: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"),
Type: "commit",
Tagger: parseAuthorLine(t, "Foo Bar <foo@bar.com> 1565789218 +0300"),
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n",
Signature: nil,
},
@ -256,7 +256,7 @@ func TestRepository_parseTagRef(t *testing.T) {
ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"),
Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"),
Type: "tag",
Tagger: parseAuthorLine(t, "Foo Bar <foo@bar.com> 1565789218 +0300"),
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n",
Signature: nil,
},
@ -314,7 +314,7 @@ qbHDASXl
ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"),
Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"),
Type: "tag",
Tagger: parseAuthorLine(t, "Foo Bar <foo@bar.com> 1565789218 +0300"),
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md",
Signature: &CommitGPGSignature{
Signature: `-----BEGIN PGP SIGNATURE-----
@ -363,14 +363,3 @@ Add changelog of v1.9.1 (#7859)
})
}
}
func parseAuthorLine(t *testing.T, committer string) *Signature {
t.Helper()
sig, err := newSignatureFromCommitline([]byte(committer))
if err != nil {
t.Fatalf("parse author line '%s': %v", committer, err)
}
return sig
}

View file

@ -4,7 +4,46 @@
package git
const (
// GitTimeLayout is the (default) time layout used by git.
GitTimeLayout = "Mon Jan _2 15:04:05 2006 -0700"
import (
"strconv"
"strings"
"time"
"code.gitea.io/gitea/modules/log"
)
// Helper to get a signature from the commit line, which looks like:
//
// full name <user@example.com> 1378823654 +0200
//
// Haven't found the official reference for the standard format yet.
// This function never fails, if the "line" can't be parsed, it returns a default Signature with "zero" time.
func parseSignatureFromCommitLine(line string) *Signature {
sig := &Signature{}
s1, sx, ok1 := strings.Cut(line, " <")
s2, s3, ok2 := strings.Cut(sx, "> ")
if !ok1 || !ok2 {
sig.Name = line
return sig
}
sig.Name, sig.Email = s1, s2
if strings.Count(s3, " ") == 1 {
ts, tz, _ := strings.Cut(s3, " ")
seconds, _ := strconv.ParseInt(ts, 10, 64)
if tzTime, err := time.Parse("-0700", tz); err == nil {
sig.When = time.Unix(seconds, 0).In(tzTime.Location())
}
} else {
// the old gitea code tried to parse the date in a few different formats, but it's not clear why.
// according to public document, only the standard format "timestamp timezone" could be found, so drop other formats.
log.Error("suspicious commit line format: %q", line)
for _, fmt := range []string{ /*"Mon Jan _2 15:04:05 2006 -0700"*/ } {
if t, err := time.Parse(fmt, s3); err == nil {
sig.When = t
break
}
}
}
return sig
}

View file

@ -7,52 +7,8 @@
package git
import (
"bytes"
"strconv"
"strings"
"time"
"github.com/go-git/go-git/v5/plumbing/object"
)
// Signature represents the Author or Committer information.
type Signature = object.Signature
// Helper to get a signature from the commit line, which looks like these:
//
// author Patrick Gundlach <gundlach@speedata.de> 1378823654 +0200
// author Patrick Gundlach <gundlach@speedata.de> Thu, 07 Apr 2005 22:13:13 +0200
//
// but without the "author " at the beginning (this method should)
// be used for author and committer.
//
// FIXME: include timezone for timestamp!
func newSignatureFromCommitline(line []byte) (_ *Signature, err error) {
sig := new(Signature)
emailStart := bytes.IndexByte(line, '<')
if emailStart > 0 { // Empty name has already occurred, even if it shouldn't
sig.Name = strings.TrimSpace(string(line[:emailStart-1]))
}
emailEnd := bytes.IndexByte(line, '>')
sig.Email = string(line[emailStart+1 : emailEnd])
// Check date format.
if len(line) > emailEnd+2 {
firstChar := line[emailEnd+2]
if firstChar >= 48 && firstChar <= 57 {
timestop := bytes.IndexByte(line[emailEnd+2:], ' ')
timestring := string(line[emailEnd+2 : emailEnd+2+timestop])
seconds, _ := strconv.ParseInt(timestring, 10, 64)
sig.When = time.Unix(seconds, 0)
} else {
sig.When, err = time.Parse(GitTimeLayout, string(line[emailEnd+2:]))
if err != nil {
return nil, err
}
}
} else {
// Fall back to unix 0 time
sig.When = time.Unix(0, 0)
}
return sig, nil
}

View file

@ -7,21 +7,17 @@
package git
import (
"bytes"
"fmt"
"strconv"
"strings"
"time"
"code.gitea.io/gitea/modules/util"
)
// Signature represents the Author or Committer information.
// Signature represents the Author, Committer or Tagger information.
type Signature struct {
// Name represents a person name. It is an arbitrary string.
Name string
// Email is an email, but it cannot be assumed to be well-formed.
Email string
// When is the timestamp of the signature.
When time.Time
Name string // the committer name, it can be anything
Email string // the committer email, it can be anything
When time.Time // the timestamp of the signature
}
func (s *Signature) String() string {
@ -30,71 +26,5 @@ func (s *Signature) String() string {
// Decode decodes a byte array representing a signature to signature
func (s *Signature) Decode(b []byte) {
sig, _ := newSignatureFromCommitline(b)
s.Email = sig.Email
s.Name = sig.Name
s.When = sig.When
}
// Helper to get a signature from the commit line, which looks like these:
//
// author Patrick Gundlach <gundlach@speedata.de> 1378823654 +0200
// author Patrick Gundlach <gundlach@speedata.de> Thu, 07 Apr 2005 22:13:13 +0200
//
// but without the "author " at the beginning (this method should)
// be used for author and committer.
// FIXME: there are a lot of "return sig, err" (but the err is also nil), that's the old behavior, to avoid breaking
func newSignatureFromCommitline(line []byte) (sig *Signature, err error) {
sig = new(Signature)
emailStart := bytes.LastIndexByte(line, '<')
emailEnd := bytes.LastIndexByte(line, '>')
if emailStart == -1 || emailEnd == -1 || emailEnd < emailStart {
return sig, err
}
if emailStart > 0 { // Empty name has already occurred, even if it shouldn't
sig.Name = strings.TrimSpace(string(line[:emailStart-1]))
}
sig.Email = string(line[emailStart+1 : emailEnd])
hasTime := emailEnd+2 < len(line)
if !hasTime {
return sig, err
}
// Check date format.
firstChar := line[emailEnd+2]
if firstChar >= 48 && firstChar <= 57 {
idx := bytes.IndexByte(line[emailEnd+2:], ' ')
if idx < 0 {
return sig, err
}
timestring := string(line[emailEnd+2 : emailEnd+2+idx])
seconds, _ := strconv.ParseInt(timestring, 10, 64)
sig.When = time.Unix(seconds, 0)
idx += emailEnd + 3
if idx >= len(line) || idx+5 > len(line) {
return sig, err
}
timezone := string(line[idx : idx+5])
tzhours, err1 := strconv.ParseInt(timezone[0:3], 10, 64)
tzmins, err2 := strconv.ParseInt(timezone[3:], 10, 64)
if err1 != nil || err2 != nil {
return sig, err
}
if tzhours < 0 {
tzmins *= -1
}
tz := time.FixedZone("", int(tzhours*60*60+tzmins*60))
sig.When = sig.When.In(tz)
} else {
sig.When, err = time.Parse(GitTimeLayout, string(line[emailEnd+2:]))
if err != nil {
return sig, err
}
}
return sig, err
*s = *parseSignatureFromCommitLine(util.UnsafeBytesToString(b))
}

View file

@ -0,0 +1,47 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestParseSignatureFromCommitLine(t *testing.T) {
tests := []struct {
line string
want *Signature
}{
{
line: "a b <c@d.com> 12345 +0100",
want: &Signature{
Name: "a b",
Email: "c@d.com",
When: time.Unix(12345, 0).In(time.FixedZone("", 3600)),
},
},
{
line: "bad line",
want: &Signature{Name: "bad line"},
},
{
line: "bad < line",
want: &Signature{Name: "bad < line"},
},
{
line: "bad > line",
want: &Signature{Name: "bad > line"},
},
{
line: "bad-line <name@example.com>",
want: &Signature{Name: "bad-line <name@example.com>"},
},
}
for _, test := range tests {
got := parseSignatureFromCommitLine(test.line)
assert.EqualValues(t, test.want, got)
}
}

View file

@ -7,6 +7,8 @@ import (
"bytes"
"sort"
"strings"
"code.gitea.io/gitea/modules/util"
)
const (
@ -59,11 +61,7 @@ l:
// A commit can have one or more parents
tag.Type = string(line[spacepos+1:])
case "tagger":
sig, err := newSignatureFromCommitline(line[spacepos+1:])
if err != nil {
return nil, err
}
tag.Tagger = sig
tag.Tagger = parseSignatureFromCommitLine(util.UnsafeBytesToString(line[spacepos+1:]))
}
nextline += eol + 1
case eol == 0: