[FEAT] Enable ambiguous character detection in configured contexts

- The ambiguous character detection is an important security feature to
combat against sourcebase attacks (https://trojansource.codes/).
- However there are a few problems with the feature as it stands
today (i) it's apparantly an big performance hitter, it's twice as slow
as syntax highlighting (ii) it contains false positives, because it's
reporting valid problems but not valid within the context of a
programming language (ambiguous charachters in code comments being a
prime example) that can lead to security issues (iii) charachters from
certain languages always being marked as ambiguous. It's a lot of effort
to fix the aforementioned issues.
- Therefore, make it configurable in which context the ambiguous
character detection should be run, this avoids running detection in all
contexts such as file views, but still enable it in commits and pull
requests diffs where it matters the most. Ideally this also becomes an
per-repository setting, but the code architecture doesn't allow for a
clean implementation of that.
- Adds unit test.
- Adds integration tests to ensure that the contexts and instance-wide
is respected (and that ambigious charachter detection actually work in
different places).
- Ref: https://codeberg.org/forgejo/forgejo/pulls/2395#issuecomment-1575547
- Ref: https://codeberg.org/forgejo/forgejo/issues/564
This commit is contained in:
Gusted 2024-02-21 22:18:44 +01:00
parent 0081e59243
commit 5b3a82d621
No known key found for this signature in database
GPG key ID: FD821B732837125F
9 changed files with 151 additions and 15 deletions

View file

@ -10,6 +10,7 @@ package charset
import ( import (
"html/template" "html/template"
"io" "io"
"slices"
"strings" "strings"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
@ -20,16 +21,29 @@ import (
// RuneNBSP is the codepoint for NBSP // RuneNBSP is the codepoint for NBSP
const RuneNBSP = 0xa0 const RuneNBSP = 0xa0
type escapeContext string
// Keep this consistent with the documentation of [ui].SKIP_ESCAPE_CONTEXTS
// Defines the different contexts that could be used to escape in.
const (
// Wiki pages.
WikiContext escapeContext = "wiki"
// Rendered content (except markup), source code and blames.
FileviewContext escapeContext = "file-view"
// Commits or pull requet's diff.
DiffContext escapeContext = "diff"
)
// EscapeControlHTML escapes the unicode control sequences in a provided html document // EscapeControlHTML escapes the unicode control sequences in a provided html document
func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) { func EscapeControlHTML(html template.HTML, locale translation.Locale, context escapeContext, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
sb := &strings.Builder{} sb := &strings.Builder{}
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, context, allowed...) // err has been handled in EscapeControlReader
return escaped, template.HTML(sb.String()) return escaped, template.HTML(sb.String())
} }
// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus // EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, context escapeContext, allowed ...rune) (escaped *EscapeStatus, err error) {
if !setting.UI.AmbiguousUnicodeDetection { if !setting.UI.AmbiguousUnicodeDetection || slices.Contains(setting.UI.SkipEscapeContexts, string(context)) {
_, err = io.Copy(writer, reader) _, err = io.Copy(writer, reader)
return &EscapeStatus{}, err return &EscapeStatus{}, err
} }

View file

@ -4,6 +4,7 @@
package charset package charset
import ( import (
"html/template"
"strings" "strings"
"testing" "testing"
@ -14,6 +15,8 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
var testContext = escapeContext("test")
type escapeControlTest struct { type escapeControlTest struct {
name string name string
text string text string
@ -159,7 +162,7 @@ func TestEscapeControlReader(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
output := &strings.Builder{} output := &strings.Builder{}
status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{}) status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{}, testContext)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, tt.status, *status) assert.Equal(t, tt.status, *status)
assert.Equal(t, tt.result, output.String()) assert.Equal(t, tt.result, output.String())
@ -169,9 +172,22 @@ func TestEscapeControlReader(t *testing.T) {
func TestSettingAmbiguousUnicodeDetection(t *testing.T) { func TestSettingAmbiguousUnicodeDetection(t *testing.T) {
defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)() defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)()
_, out := EscapeControlHTML("a test", &translation.MockLocale{})
_, out := EscapeControlHTML("a test", &translation.MockLocale{}, testContext)
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out) assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
setting.UI.AmbiguousUnicodeDetection = false setting.UI.AmbiguousUnicodeDetection = false
_, out = EscapeControlHTML("a test", &translation.MockLocale{}) _, out = EscapeControlHTML("a test", &translation.MockLocale{}, testContext)
assert.EqualValues(t, `a test`, out) assert.EqualValues(t, `a test`, out)
} }
func TestAmbiguousUnicodeDetectionContext(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"test"})()
input := template.HTML("a test")
_, out := EscapeControlHTML(input, &translation.MockLocale{}, escapeContext("not-test"))
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
_, out = EscapeControlHTML(input, &translation.MockLocale{}, testContext)
assert.EqualValues(t, input, out)
}

View file

@ -38,6 +38,7 @@ var UI = struct {
PreferredTimestampTense string PreferredTimestampTense string
AmbiguousUnicodeDetection bool AmbiguousUnicodeDetection bool
SkipEscapeContexts []string
Notification struct { Notification struct {
MinTimeout time.Duration MinTimeout time.Duration
@ -89,6 +90,7 @@ var UI = struct {
PreferredTimestampTense: "mixed", PreferredTimestampTense: "mixed",
AmbiguousUnicodeDetection: true, AmbiguousUnicodeDetection: true,
SkipEscapeContexts: []string{},
Notification: struct { Notification: struct {
MinTimeout time.Duration MinTimeout time.Duration

View file

@ -279,7 +279,7 @@ func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames
lexerName = lexerNameForLine lexerName = lexerNameForLine
} }
br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale) br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale, charset.FileviewContext)
rows = append(rows, br) rows = append(rows, br)
escapeStatus = escapeStatus.Or(br.EscapeStatus) escapeStatus = escapeStatus.Or(br.EscapeStatus)
} }

View file

@ -307,7 +307,7 @@ func LFSFileGet(ctx *context.Context) {
// Building code view blocks with line number on server side. // Building code view blocks with line number on server side.
escapedContent := &bytes.Buffer{} escapedContent := &bytes.Buffer{}
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale) ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale, charset.FileviewContext)
var output bytes.Buffer var output bytes.Buffer
lines := strings.Split(escapedContent.String(), "\n") lines := strings.Split(escapedContent.String(), "\n")

View file

@ -338,7 +338,7 @@ func renderReadmeFile(ctx *context.Context, subfolder string, readmeFile *git.Tr
log.Error("Read readme content failed: %v", err) log.Error("Read readme content failed: %v", err)
} }
contentEscaped := template.HTMLEscapeString(util.UnsafeBytesToString(content)) contentEscaped := template.HTMLEscapeString(util.UnsafeBytesToString(content))
ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(template.HTML(contentEscaped), ctx.Locale) ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(template.HTML(contentEscaped), ctx.Locale, charset.FileviewContext)
} }
if !fInfo.isLFSFile && ctx.Repo.CanEnableEditor(ctx, ctx.Doer) { if !fInfo.isLFSFile && ctx.Repo.CanEnableEditor(ctx, ctx.Doer) {
@ -572,7 +572,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry) {
status := &charset.EscapeStatus{} status := &charset.EscapeStatus{}
statuses := make([]*charset.EscapeStatus, len(fileContent)) statuses := make([]*charset.EscapeStatus, len(fileContent))
for i, line := range fileContent { for i, line := range fileContent {
statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale) statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale, charset.FileviewContext)
status = status.Or(statuses[i]) status = status.Or(statuses[i])
} }
ctx.Data["EscapeStatus"] = status ctx.Data["EscapeStatus"] = status
@ -678,7 +678,7 @@ func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input i
go func() { go func() {
sb := &strings.Builder{} sb := &strings.Builder{}
// We allow NBSP here this is rendered // We allow NBSP here this is rendered
escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.RuneNBSP) escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.FileviewContext, charset.RuneNBSP)
output = template.HTML(sb.String()) output = template.HTML(sb.String())
close(done) close(done)
}() }()

View file

@ -254,7 +254,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
done := make(chan struct{}) done := make(chan struct{})
go func() { go func() {
// We allow NBSP here this is rendered // We allow NBSP here this is rendered
escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.RuneNBSP) escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.WikiContext, charset.RuneNBSP)
output = buf.String() output = buf.String()
buf.Reset() buf.Reset()
close(done) close(done)

View file

@ -284,14 +284,14 @@ type DiffInline struct {
// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped // DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline { func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline {
status, content := charset.EscapeControlHTML(s, locale) status, content := charset.EscapeControlHTML(s, locale, charset.DiffContext)
return DiffInline{EscapeStatus: status, Content: content} return DiffInline{EscapeStatus: status, Content: content}
} }
// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped // DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline { func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline {
highlighted, _ := highlight.Code(fileName, language, code) highlighted, _ := highlight.Code(fileName, language, code)
status, content := charset.EscapeControlHTML(highlighted, locale) status, content := charset.EscapeControlHTML(highlighted, locale, charset.DiffContext)
return DiffInline{EscapeStatus: status, Content: content} return DiffInline{EscapeStatus: status, Content: content}
} }

View file

@ -5,8 +5,16 @@ package integration
import ( import (
"net/http" "net/http"
"net/url"
"strings"
"testing" "testing"
unit_model "code.gitea.io/gitea/models/unit"
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
files_service "code.gitea.io/gitea/services/repository/files"
"code.gitea.io/gitea/tests" "code.gitea.io/gitea/tests"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -25,3 +33,99 @@ func TestRenderFileSVGIsInImgTag(t *testing.T) {
assert.True(t, exists, "The SVG image should be in an <img> tag so that scripts in the SVG are not run") assert.True(t, exists, "The SVG image should be in an <img> tag so that scripts in the SVG are not run")
assert.Equal(t, "/user2/repo2/raw/branch/master/line.svg", src) assert.Equal(t, "/user2/repo2/raw/branch/master/line.svg", src)
} }
func TestAmbiguousCharacterDetection(t *testing.T) {
onGiteaRun(t, func(t *testing.T, u *url.URL) {
user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
session := loginUser(t, user2.Name)
// Prepare the environments. File view, commit view (diff), wiki page.
repo, commitID, f := CreateDeclarativeRepo(t, user2, "",
[]unit_model.Type{unit_model.TypeCode, unit_model.TypeWiki}, nil,
[]*files_service.ChangeRepoFile{
{
Operation: "create",
TreePath: "test.sh",
ContentReader: strings.NewReader("Hello there!\nline western"),
},
},
)
defer f()
req := NewRequestWithValues(t, "POST", repo.Link()+"/wiki?action=new", map[string]string{
"_csrf": GetCSRF(t, session, repo.Link()+"/wiki?action=new"),
"title": "Normal",
"content": "Hello Hello",
})
session.MakeRequest(t, req, http.StatusSeeOther)
assertCase := func(t *testing.T, fileContext, commitContext, wikiContext bool) {
t.Helper()
t.Run("File context", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
req := NewRequest(t, "GET", repo.Link()+"/src/branch/main/test.sh")
resp := session.MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body)
htmlDoc.AssertElement(t, ".unicode-escape-prompt", fileContext)
})
t.Run("Commit context", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
req := NewRequest(t, "GET", repo.Link()+"/commit/"+commitID)
resp := session.MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body)
htmlDoc.AssertElement(t, ".lines-escape .toggle-escape-button", commitContext)
})
t.Run("Wiki context", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
req := NewRequest(t, "GET", repo.Link()+"/wiki/Normal")
resp := session.MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body)
htmlDoc.AssertElement(t, ".unicode-escape-prompt", wikiContext)
})
}
t.Run("Enabled all context", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{})()
assertCase(t, true, true, true)
})
t.Run("Enabled file context", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"diff", "wiki"})()
assertCase(t, true, false, false)
})
t.Run("Enabled commit context", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "wiki"})()
assertCase(t, false, true, false)
})
t.Run("Enabled wiki context", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "diff"})()
assertCase(t, false, false, true)
})
t.Run("No context", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "wiki", "diff"})()
assertCase(t, false, false, false)
})
t.Run("Disabled detection", func(t *testing.T) {
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{})()
defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, false)()
assertCase(t, false, false, false)
})
})
}