Fix yet another bug with diff file names (#12771)

Following further testing it has become apparent that the diff line
cannot be used to determine filenames for diffs with any sort of predictability
the answer therefore is to use the other lines that are provided with a diff

Fix #12768

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath 2020-09-09 14:08:40 +01:00 committed by GitHub
parent 1fbc50f974
commit 96969ddec8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 247 additions and 51 deletions

View file

@ -694,7 +694,7 @@ func ActionContent2Commits(act Actioner) *repository.PushCommits {
// DiffTypeToStr returns diff type name // DiffTypeToStr returns diff type name
func DiffTypeToStr(diffType int) string { func DiffTypeToStr(diffType int) string {
diffTypes := map[int]string{ diffTypes := map[int]string{
1: "add", 2: "modify", 3: "del", 4: "rename", 1: "add", 2: "modify", 3: "del", 4: "rename", 5: "copy",
} }
return diffTypes[diffType] return diffTypes[diffType]
} }

View file

@ -53,6 +53,7 @@ const (
DiffFileChange DiffFileChange
DiffFileDel DiffFileDel
DiffFileRename DiffFileRename
DiffFileCopy
) )
// DiffLineExpandDirection represents the DiffLineSection expand direction // DiffLineExpandDirection represents the DiffLineSection expand direction
@ -481,6 +482,45 @@ func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*D
} }
line := linebuf.String() line := linebuf.String()
if strings.HasPrefix(line, "--- ") {
if line[4] == '"' {
fmt.Sscanf(line[4:], "%q", &curFile.OldName)
} else {
curFile.OldName = line[4:]
if strings.Contains(curFile.OldName, " ") {
// Git adds a terminal \t if there is a space in the name
curFile.OldName = curFile.OldName[:len(curFile.OldName)-1]
}
}
if curFile.OldName[0:2] == "a/" {
curFile.OldName = curFile.OldName[2:]
}
continue
} else if strings.HasPrefix(line, "+++ ") {
if line[4] == '"' {
fmt.Sscanf(line[4:], "%q", &curFile.Name)
} else {
curFile.Name = line[4:]
if strings.Contains(curFile.Name, " ") {
// Git adds a terminal \t if there is a space in the name
curFile.Name = curFile.Name[:len(curFile.Name)-1]
}
}
if curFile.Name[0:2] == "b/" {
curFile.Name = curFile.Name[2:]
}
curFile.IsRenamed = (curFile.Name != curFile.OldName) && !(curFile.IsCreated || curFile.IsDeleted)
if curFile.IsDeleted {
curFile.Name = curFile.OldName
curFile.OldName = ""
} else if curFile.IsCreated {
curFile.OldName = ""
}
continue
} else if len(line) == 0 {
continue
}
if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") || len(line) == 0 { if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") || len(line) == 0 {
continue continue
} }
@ -569,36 +609,10 @@ func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*D
break break
} }
// Note: In case file name is surrounded by double quotes (it happens only in git-shell).
// e.g. diff --git "a/xxx" "b/xxx"
var a string
var b string
rd := strings.NewReader(line[len(cmdDiffHead):])
char, _ := rd.ReadByte()
_ = rd.UnreadByte()
if char == '"' {
fmt.Fscanf(rd, "%q ", &a)
} else {
fmt.Fscanf(rd, "%s ", &a)
}
char, _ = rd.ReadByte()
_ = rd.UnreadByte()
if char == '"' {
fmt.Fscanf(rd, "%q", &b)
} else {
fmt.Fscanf(rd, "%s", &b)
}
a = a[2:]
b = b[2:]
curFile = &DiffFile{ curFile = &DiffFile{
Name: b,
OldName: a,
Index: len(diff.Files) + 1, Index: len(diff.Files) + 1,
Type: DiffFileChange, Type: DiffFileChange,
Sections: make([]*DiffSection, 0, 10), Sections: make([]*DiffSection, 0, 10),
IsRenamed: a != b,
} }
diff.Files = append(diff.Files, curFile) diff.Files = append(diff.Files, curFile)
curFileLinesCount = 0 curFileLinesCount = 0
@ -607,6 +621,7 @@ func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*D
curFileLFSPrefix = false curFileLFSPrefix = false
// Check file diff type and is submodule. // Check file diff type and is submodule.
loop:
for { for {
line, err := input.ReadString('\n') line, err := input.ReadString('\n')
if err != nil { if err != nil {
@ -617,6 +632,7 @@ func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*D
} }
} }
if curFile.Type != DiffFileRename {
switch { switch {
case strings.HasPrefix(line, "new file"): case strings.HasPrefix(line, "new file"):
curFile.Type = DiffFileAdd curFile.Type = DiffFileAdd
@ -629,12 +645,55 @@ func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*D
case strings.HasPrefix(line, "similarity index 100%"): case strings.HasPrefix(line, "similarity index 100%"):
curFile.Type = DiffFileRename curFile.Type = DiffFileRename
} }
if curFile.Type > 0 { if curFile.Type > 0 && curFile.Type != DiffFileRename {
if strings.HasSuffix(line, " 160000\n") { if strings.HasSuffix(line, " 160000\n") {
curFile.IsSubmodule = true curFile.IsSubmodule = true
} }
break break
} }
} else {
switch {
case strings.HasPrefix(line, "rename from "):
if line[12] == '"' {
fmt.Sscanf(line[12:], "%q", &curFile.OldName)
} else {
curFile.OldName = line[12:]
curFile.OldName = curFile.OldName[:len(curFile.OldName)-1]
}
case strings.HasPrefix(line, "rename to "):
if line[10] == '"' {
fmt.Sscanf(line[10:], "%q", &curFile.Name)
} else {
curFile.Name = line[10:]
curFile.Name = curFile.Name[:len(curFile.Name)-1]
}
curFile.IsRenamed = true
break loop
case strings.HasPrefix(line, "copy from "):
if line[10] == '"' {
fmt.Sscanf(line[10:], "%q", &curFile.OldName)
} else {
curFile.OldName = line[10:]
curFile.OldName = curFile.OldName[:len(curFile.OldName)-1]
}
case strings.HasPrefix(line, "copy to "):
if line[8] == '"' {
fmt.Sscanf(line[8:], "%q", &curFile.Name)
} else {
curFile.Name = line[8:]
curFile.Name = curFile.Name[:len(curFile.Name)-1]
}
curFile.IsRenamed = true
curFile.Type = DiffFileCopy
break loop
default:
if strings.HasSuffix(line, " 160000\n") {
curFile.IsSubmodule = true
} else {
break loop
}
}
}
} }
} }
} }

View file

@ -6,6 +6,7 @@
package gitdiff package gitdiff
import ( import (
"encoding/json"
"fmt" "fmt"
"html/template" "html/template"
"strings" "strings"
@ -14,11 +15,9 @@ import (
"code.gitea.io/gitea/models" "code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"gopkg.in/ini.v1"
dmp "github.com/sergi/go-diff/diffmatchpatch" dmp "github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"gopkg.in/ini.v1"
) )
func assertEqual(t *testing.T, s1 string, s2 template.HTML) { func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
@ -77,7 +76,145 @@ func TestDiffToHTML(t *testing.T) {
}, DiffLineAdd)) }, DiffLineAdd))
} }
func TestParsePatch(t *testing.T) { func TestParsePatch_singlefile(t *testing.T) {
type testcase struct {
name string
gitdiff string
wantErr bool
addition int
deletion int
oldFilename string
filename string
}
tests := []testcase{
{
name: "readme.md2readme.md",
gitdiff: `diff --git "a/README.md" "b/README.md"
--- a/README.md
+++ b/README.md
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off
`,
addition: 4,
deletion: 1,
filename: "README.md",
},
{
name: "A \\ B",
gitdiff: `diff --git "a/A \\ B" "b/A \\ B"
--- "a/A \\ B"
+++ "b/A \\ B"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off`,
addition: 4,
deletion: 1,
filename: "A \\ B",
},
{
name: "really weird filename",
gitdiff: `diff --git a/a b/file b/a a/file b/a b/file b/a a/file
index d2186f1..f5c8ed2 100644
--- a/a b/file b/a a/file
+++ b/a b/file b/a a/file
@@ -1,3 +1,2 @@
Create a weird file.
-and what does diff do here?
\ No newline at end of file`,
addition: 0,
deletion: 1,
filename: "a b/file b/a a/file",
oldFilename: "a b/file b/a a/file",
},
{
name: "delete file with blanks",
gitdiff: `diff --git a/file with blanks b/file with blanks
deleted file mode 100644
index 898651a..0000000
--- a/file with blanks
+++ /dev/null
@@ -1,5 +0,0 @@
-a blank file
-
-has a couple o line
-
-the 5th line is the last
`,
addition: 0,
deletion: 5,
filename: "file with blanks",
},
{
name: "rename a—as",
gitdiff: `diff --git "a/\360\243\220\265b\342\200\240vs" "b/a\342\200\224as"
similarity index 100%
rename from "\360\243\220\265b\342\200\240vs"
rename to "a\342\200\224as"
`,
addition: 0,
deletion: 0,
oldFilename: "𣐵b†vs",
filename: "a—as",
},
{
name: "rename with spaces",
gitdiff: `diff --git a/a b/file b/a a/file b/a b/a a/file b/b file
similarity index 100%
rename from a b/file b/a a/file
rename to a b/a a/file b/b file
`,
oldFilename: "a b/file b/a a/file",
filename: "a b/a a/file b/b file",
},
}
for _, testcase := range tests {
t.Run(testcase.name, func(t *testing.T) {
got, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff))
if (err != nil) != testcase.wantErr {
t.Errorf("ParsePatch() error = %v, wantErr %v", err, testcase.wantErr)
return
}
gotMarshaled, _ := json.MarshalIndent(got, " ", " ")
if got.NumFiles != 1 {
t.Errorf("ParsePath() did not receive 1 file:\n%s", string(gotMarshaled))
return
}
if got.TotalAddition != testcase.addition {
t.Errorf("ParsePath() does not have correct totalAddition %d, wanted %d", got.TotalAddition, testcase.addition)
}
if got.TotalDeletion != testcase.deletion {
t.Errorf("ParsePath() did not have correct totalDeletion %d, wanted %d", got.TotalDeletion, testcase.deletion)
}
file := got.Files[0]
if file.Addition != testcase.addition {
t.Errorf("ParsePath() does not have correct file addition %d, wanted %d", file.Addition, testcase.addition)
}
if file.Deletion != testcase.deletion {
t.Errorf("ParsePath() did not have correct file deletion %d, wanted %d", file.Deletion, testcase.deletion)
}
if file.OldName != testcase.oldFilename {
t.Errorf("ParsePath() did not have correct OldName %s, wanted %s", file.OldName, testcase.oldFilename)
}
if file.Name != testcase.filename {
t.Errorf("ParsePath() did not have correct Name %s, wanted %s", file.Name, testcase.filename)
}
})
}
var diff = `diff --git "a/README.md" "b/README.md" var diff = `diff --git "a/README.md" "b/README.md"
--- a/README.md --- a/README.md
+++ b/README.md +++ b/README.md