Merge pull request '[FEAT] Enable ambiguous character detection in configured contexts' (#2427) from gusted/forgejo-ambigious-characters into forgejo

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/2427
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Reviewed-by: oliverpool <oliverpool@noreply.codeberg.org>
This commit is contained in:
Earl Warren 2024-02-24 12:25:31 +00:00
commit 3471adb14a
9 changed files with 151 additions and 15 deletions

View file

@ -10,6 +10,7 @@ package charset
import (
"html/template"
"io"
"slices"
"strings"
"code.gitea.io/gitea/modules/log"
@ -20,16 +21,29 @@ import (
// RuneNBSP is the codepoint for NBSP
const RuneNBSP = 0xa0
type escapeContext string
// Keep this consistent with the documentation of [ui].SKIP_ESCAPE_CONTEXTS
// Defines the different contexts that could be used to escape in.
const (
// Wiki pages.
WikiContext escapeContext = "wiki"
// Rendered content (except markup), source code and blames.
FileviewContext escapeContext = "file-view"
// Commits or pull requet's diff.
DiffContext escapeContext = "diff"
)
// EscapeControlHTML escapes the unicode control sequences in a provided html document
func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
func EscapeControlHTML(html template.HTML, locale translation.Locale, context escapeContext, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
sb := &strings.Builder{}
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, context, allowed...) // err has been handled in EscapeControlReader
return escaped, template.HTML(sb.String())
}
// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
if !setting.UI.AmbiguousUnicodeDetection {
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, context escapeContext, allowed ...rune) (escaped *EscapeStatus, err error) {
if !setting.UI.AmbiguousUnicodeDetection || slices.Contains(setting.UI.SkipEscapeContexts, string(context)) {
_, err = io.Copy(writer, reader)
return &EscapeStatus{}, err
}

View file

@ -4,6 +4,7 @@
package charset
import (
"html/template"
"strings"
"testing"
@ -14,6 +15,8 @@ import (
"github.com/stretchr/testify/assert"
)
var testContext = escapeContext("test")
type escapeControlTest struct {
name string
text string
@ -159,7 +162,7 @@ func TestEscapeControlReader(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
output := &strings.Builder{}
status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{})
status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{}, testContext)
assert.NoError(t, err)
assert.Equal(t, tt.status, *status)
assert.Equal(t, tt.result, output.String())
@ -169,9 +172,22 @@ func TestEscapeControlReader(t *testing.T) {
func TestSettingAmbiguousUnicodeDetection(t *testing.T) {
defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)()
_, out := EscapeControlHTML("a test", &translation.MockLocale{})
_, out := EscapeControlHTML("a test", &translation.MockLocale{}, testContext)
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
setting.UI.AmbiguousUnicodeDetection = false
_, out = EscapeControlHTML("a test", &translation.MockLocale{})
_, out = EscapeControlHTML("a test", &translation.MockLocale{}, testContext)
assert.EqualValues(t, `a test`, out)
}
func TestAmbiguousUnicodeDetectionContext(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"test"})()
input := template.HTML("a test")
_, out := EscapeControlHTML(input, &translation.MockLocale{}, escapeContext("not-test"))
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
_, out = EscapeControlHTML(input, &translation.MockLocale{}, testContext)
assert.EqualValues(t, input, out)
}

View file

@ -38,6 +38,7 @@ var UI = struct {
PreferredTimestampTense string
AmbiguousUnicodeDetection bool
SkipEscapeContexts []string
Notification struct {
MinTimeout time.Duration
@ -89,6 +90,7 @@ var UI = struct {
PreferredTimestampTense: "mixed",
AmbiguousUnicodeDetection: true,
SkipEscapeContexts: []string{},
Notification: struct {
MinTimeout time.Duration

View file

@ -288,7 +288,7 @@ func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames
lexerName = lexerNameForLine
}
br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale)
br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale, charset.FileviewContext)
rows = append(rows, br)
escapeStatus = escapeStatus.Or(br.EscapeStatus)
}

View file

@ -307,7 +307,7 @@ func LFSFileGet(ctx *context.Context) {
// Building code view blocks with line number on server side.
escapedContent := &bytes.Buffer{}
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale)
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale, charset.FileviewContext)
var output bytes.Buffer
lines := strings.Split(escapedContent.String(), "\n")

View file

@ -338,7 +338,7 @@ func renderReadmeFile(ctx *context.Context, subfolder string, readmeFile *git.Tr
log.Error("Read readme content failed: %v", err)
}
contentEscaped := template.HTMLEscapeString(util.UnsafeBytesToString(content))
ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(template.HTML(contentEscaped), ctx.Locale)
ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(template.HTML(contentEscaped), ctx.Locale, charset.FileviewContext)
}
if !fInfo.isLFSFile && ctx.Repo.CanEnableEditor(ctx, ctx.Doer) {
@ -572,7 +572,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry) {
status := &charset.EscapeStatus{}
statuses := make([]*charset.EscapeStatus, len(fileContent))
for i, line := range fileContent {
statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale)
statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale, charset.FileviewContext)
status = status.Or(statuses[i])
}
ctx.Data["EscapeStatus"] = status
@ -678,7 +678,7 @@ func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input i
go func() {
sb := &strings.Builder{}
// We allow NBSP here this is rendered
escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.RuneNBSP)
escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.FileviewContext, charset.RuneNBSP)
output = template.HTML(sb.String())
close(done)
}()

View file

@ -254,7 +254,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
done := make(chan struct{})
go func() {
// We allow NBSP here this is rendered
escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.RuneNBSP)
escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.WikiContext, charset.RuneNBSP)
output = buf.String()
buf.Reset()
close(done)

View file

@ -284,14 +284,14 @@ type DiffInline struct {
// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline {
status, content := charset.EscapeControlHTML(s, locale)
status, content := charset.EscapeControlHTML(s, locale, charset.DiffContext)
return DiffInline{EscapeStatus: status, Content: content}
}
// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline {
highlighted, _ := highlight.Code(fileName, language, code)
status, content := charset.EscapeControlHTML(highlighted, locale)
status, content := charset.EscapeControlHTML(highlighted, locale, charset.DiffContext)
return DiffInline{EscapeStatus: status, Content: content}
}

View file

@ -5,8 +5,16 @@ package integration
import (
"net/http"
"net/url"
"strings"
"testing"
unit_model "code.gitea.io/gitea/models/unit"
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
files_service "code.gitea.io/gitea/services/repository/files"
"code.gitea.io/gitea/tests"
"github.com/stretchr/testify/assert"
@ -25,3 +33,99 @@ func TestRenderFileSVGIsInImgTag(t *testing.T) {
assert.True(t, exists, "The SVG image should be in an <img> tag so that scripts in the SVG are not run")
assert.Equal(t, "/user2/repo2/raw/branch/master/line.svg", src)
}
func TestAmbiguousCharacterDetection(t *testing.T) {
onGiteaRun(t, func(t *testing.T, u *url.URL) {
user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
session := loginUser(t, user2.Name)
// Prepare the environments. File view, commit view (diff), wiki page.
repo, commitID, f := CreateDeclarativeRepo(t, user2, "",
[]unit_model.Type{unit_model.TypeCode, unit_model.TypeWiki}, nil,
[]*files_service.ChangeRepoFile{
{
Operation: "create",
TreePath: "test.sh",
ContentReader: strings.NewReader("Hello there!\nline western"),
},
},
)
defer f()
req := NewRequestWithValues(t, "POST", repo.Link()+"/wiki?action=new", map[string]string{
"_csrf": GetCSRF(t, session, repo.Link()+"/wiki?action=new"),
"title": "Normal",
"content": "Hello Hello",
})
session.MakeRequest(t, req, http.StatusSeeOther)
assertCase := func(t *testing.T, fileContext, commitContext, wikiContext bool) {
t.Helper()
t.Run("File context", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
req := NewRequest(t, "GET", repo.Link()+"/src/branch/main/test.sh")
resp := session.MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body)
htmlDoc.AssertElement(t, ".unicode-escape-prompt", fileContext)
})
t.Run("Commit context", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
req := NewRequest(t, "GET", repo.Link()+"/commit/"+commitID)
resp := session.MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body)
htmlDoc.AssertElement(t, ".lines-escape .toggle-escape-button", commitContext)
})
t.Run("Wiki context", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
req := NewRequest(t, "GET", repo.Link()+"/wiki/Normal")
resp := session.MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body)
htmlDoc.AssertElement(t, ".unicode-escape-prompt", wikiContext)
})
}
t.Run("Enabled all context", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{})()
assertCase(t, true, true, true)
})
t.Run("Enabled file context", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"diff", "wiki"})()
assertCase(t, true, false, false)
})
t.Run("Enabled commit context", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "wiki"})()
assertCase(t, false, true, false)
})
t.Run("Enabled wiki context", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "diff"})()
assertCase(t, false, false, true)
})
t.Run("No context", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "wiki", "diff"})()
assertCase(t, false, false, false)
})
t.Run("Disabled detection", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{})()
defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, false)()
assertCase(t, false, false, false)
})
})
}