mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-01-12 18:15:39 +00:00
Run processors on whole of text (#16155)
There is an inefficiency in the design of our processors which means that Emoji and other processors run in order n^2 time. This PR forces the processors to process the entirety of text node before passing back up. The fundamental inefficiency remains but it should be significantly ameliorated. Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
parent
6ad5d0a306
commit
0db1048c3a
3 changed files with 414 additions and 316 deletions
|
@ -6,6 +6,7 @@
|
|||
package emoji
|
||||
|
||||
import (
|
||||
"io"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -145,6 +146,8 @@ func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
|
|||
if n.writecount == 2 {
|
||||
n.idx = n.pos
|
||||
n.end = n.pos + len(p)
|
||||
n.pos += len(p)
|
||||
return len(p), io.EOF
|
||||
}
|
||||
n.pos += len(p)
|
||||
return len(p), nil
|
||||
|
@ -155,6 +158,8 @@ func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
|
|||
if n.writecount == 2 {
|
||||
n.idx = n.pos
|
||||
n.end = n.pos + len(s)
|
||||
n.pos += len(s)
|
||||
return len(s), io.EOF
|
||||
}
|
||||
n.pos += len(s)
|
||||
return len(s), nil
|
||||
|
|
|
@ -89,6 +89,7 @@ func isLinkStr(link string) bool {
|
|||
return validLinksPattern.MatchString(link)
|
||||
}
|
||||
|
||||
// FIXME: This function is not concurrent safe
|
||||
func getIssueFullPattern() *regexp.Regexp {
|
||||
if issueFullPattern == nil {
|
||||
issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) +
|
||||
|
@ -566,26 +567,38 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
|
|||
}
|
||||
|
||||
func mentionProcessor(ctx *RenderContext, node *html.Node) {
|
||||
// We replace only the first mention; other mentions will be addressed later
|
||||
found, loc := references.FindFirstMentionBytes([]byte(node.Data))
|
||||
if !found {
|
||||
return
|
||||
}
|
||||
mention := node.Data[loc.Start:loc.End]
|
||||
var teams string
|
||||
teams, ok := ctx.Metas["teams"]
|
||||
// FIXME: util.URLJoin may not be necessary here:
|
||||
// - setting.AppURL is defined to have a terminal '/' so unless mention[1:]
|
||||
// is an AppSubURL link we can probably fallback to concatenation.
|
||||
// team mention should follow @orgName/teamName style
|
||||
if ok && strings.Contains(mention, "/") {
|
||||
mentionOrgAndTeam := strings.Split(mention, "/")
|
||||
if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
|
||||
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
|
||||
start := 0
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next && start < len(node.Data) {
|
||||
// We replace only the first mention; other mentions will be addressed later
|
||||
found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:]))
|
||||
if !found {
|
||||
return
|
||||
}
|
||||
return
|
||||
loc.Start += start
|
||||
loc.End += start
|
||||
mention := node.Data[loc.Start:loc.End]
|
||||
var teams string
|
||||
teams, ok := ctx.Metas["teams"]
|
||||
// FIXME: util.URLJoin may not be necessary here:
|
||||
// - setting.AppURL is defined to have a terminal '/' so unless mention[1:]
|
||||
// is an AppSubURL link we can probably fallback to concatenation.
|
||||
// team mention should follow @orgName/teamName style
|
||||
if ok && strings.Contains(mention, "/") {
|
||||
mentionOrgAndTeam := strings.Split(mention, "/")
|
||||
if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
|
||||
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
|
||||
node = node.NextSibling.NextSibling
|
||||
start = 0
|
||||
continue
|
||||
}
|
||||
start = loc.End
|
||||
continue
|
||||
}
|
||||
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
|
||||
node = node.NextSibling.NextSibling
|
||||
start = 0
|
||||
}
|
||||
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
|
||||
}
|
||||
|
||||
func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
|
||||
|
@ -593,188 +606,196 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
|
|||
}
|
||||
|
||||
func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
|
||||
m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
content := node.Data[m[2]:m[3]]
|
||||
tail := node.Data[m[4]:m[5]]
|
||||
props := make(map[string]string)
|
||||
content := node.Data[m[2]:m[3]]
|
||||
tail := node.Data[m[4]:m[5]]
|
||||
props := make(map[string]string)
|
||||
|
||||
// MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
|
||||
// It makes page handling terrible, but we prefer GitHub syntax
|
||||
// And fall back to MediaWiki only when it is obvious from the look
|
||||
// Of text and link contents
|
||||
sl := strings.Split(content, "|")
|
||||
for _, v := range sl {
|
||||
if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
|
||||
// There is no equal in this argument; this is a mandatory arg
|
||||
if props["name"] == "" {
|
||||
if isLinkStr(v) {
|
||||
// If we clearly see it is a link, we save it so
|
||||
// MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
|
||||
// It makes page handling terrible, but we prefer GitHub syntax
|
||||
// And fall back to MediaWiki only when it is obvious from the look
|
||||
// Of text and link contents
|
||||
sl := strings.Split(content, "|")
|
||||
for _, v := range sl {
|
||||
if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
|
||||
// There is no equal in this argument; this is a mandatory arg
|
||||
if props["name"] == "" {
|
||||
if isLinkStr(v) {
|
||||
// If we clearly see it is a link, we save it so
|
||||
|
||||
// But first we need to ensure, that if both mandatory args provided
|
||||
// look like links, we stick to GitHub syntax
|
||||
if props["link"] != "" {
|
||||
props["name"] = props["link"]
|
||||
// But first we need to ensure, that if both mandatory args provided
|
||||
// look like links, we stick to GitHub syntax
|
||||
if props["link"] != "" {
|
||||
props["name"] = props["link"]
|
||||
}
|
||||
|
||||
props["link"] = strings.TrimSpace(v)
|
||||
} else {
|
||||
props["name"] = v
|
||||
}
|
||||
|
||||
props["link"] = strings.TrimSpace(v)
|
||||
} else {
|
||||
props["name"] = v
|
||||
props["link"] = strings.TrimSpace(v)
|
||||
}
|
||||
} else {
|
||||
props["link"] = strings.TrimSpace(v)
|
||||
}
|
||||
} else {
|
||||
// There is an equal; optional argument.
|
||||
// There is an equal; optional argument.
|
||||
|
||||
sep := strings.IndexByte(v, '=')
|
||||
key, val := v[:sep], html.UnescapeString(v[sep+1:])
|
||||
sep := strings.IndexByte(v, '=')
|
||||
key, val := v[:sep], html.UnescapeString(v[sep+1:])
|
||||
|
||||
// When parsing HTML, x/net/html will change all quotes which are
|
||||
// not used for syntax into UTF-8 quotes. So checking val[0] won't
|
||||
// be enough, since that only checks a single byte.
|
||||
if len(val) > 1 {
|
||||
if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) ||
|
||||
(strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) {
|
||||
const lenQuote = len("‘")
|
||||
val = val[lenQuote : len(val)-lenQuote]
|
||||
} else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) ||
|
||||
(strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) {
|
||||
val = val[1 : len(val)-1]
|
||||
} else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") {
|
||||
const lenQuote = len("‘")
|
||||
val = val[1 : len(val)-lenQuote]
|
||||
// When parsing HTML, x/net/html will change all quotes which are
|
||||
// not used for syntax into UTF-8 quotes. So checking val[0] won't
|
||||
// be enough, since that only checks a single byte.
|
||||
if len(val) > 1 {
|
||||
if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) ||
|
||||
(strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) {
|
||||
const lenQuote = len("‘")
|
||||
val = val[lenQuote : len(val)-lenQuote]
|
||||
} else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) ||
|
||||
(strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) {
|
||||
val = val[1 : len(val)-1]
|
||||
} else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") {
|
||||
const lenQuote = len("‘")
|
||||
val = val[1 : len(val)-lenQuote]
|
||||
}
|
||||
}
|
||||
props[key] = val
|
||||
}
|
||||
props[key] = val
|
||||
}
|
||||
}
|
||||
|
||||
var name, link string
|
||||
if props["link"] != "" {
|
||||
link = props["link"]
|
||||
} else if props["name"] != "" {
|
||||
link = props["name"]
|
||||
}
|
||||
if props["title"] != "" {
|
||||
name = props["title"]
|
||||
} else if props["name"] != "" {
|
||||
name = props["name"]
|
||||
} else {
|
||||
name = link
|
||||
}
|
||||
|
||||
name += tail
|
||||
image := false
|
||||
switch ext := filepath.Ext(link); ext {
|
||||
// fast path: empty string, ignore
|
||||
case "":
|
||||
break
|
||||
case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
|
||||
image = true
|
||||
}
|
||||
|
||||
childNode := &html.Node{}
|
||||
linkNode := &html.Node{
|
||||
FirstChild: childNode,
|
||||
LastChild: childNode,
|
||||
Type: html.ElementNode,
|
||||
Data: "a",
|
||||
DataAtom: atom.A,
|
||||
}
|
||||
childNode.Parent = linkNode
|
||||
absoluteLink := isLinkStr(link)
|
||||
if !absoluteLink {
|
||||
if image {
|
||||
link = strings.ReplaceAll(link, " ", "+")
|
||||
var name, link string
|
||||
if props["link"] != "" {
|
||||
link = props["link"]
|
||||
} else if props["name"] != "" {
|
||||
link = props["name"]
|
||||
}
|
||||
if props["title"] != "" {
|
||||
name = props["title"]
|
||||
} else if props["name"] != "" {
|
||||
name = props["name"]
|
||||
} else {
|
||||
link = strings.ReplaceAll(link, " ", "-")
|
||||
}
|
||||
if !strings.Contains(link, "/") {
|
||||
link = url.PathEscape(link)
|
||||
}
|
||||
}
|
||||
urlPrefix := ctx.URLPrefix
|
||||
if image {
|
||||
if !absoluteLink {
|
||||
if IsSameDomain(urlPrefix) {
|
||||
urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1)
|
||||
}
|
||||
if ctx.IsWiki {
|
||||
link = util.URLJoin("wiki", "raw", link)
|
||||
}
|
||||
link = util.URLJoin(urlPrefix, link)
|
||||
}
|
||||
title := props["title"]
|
||||
if title == "" {
|
||||
title = props["alt"]
|
||||
}
|
||||
if title == "" {
|
||||
title = path.Base(name)
|
||||
}
|
||||
alt := props["alt"]
|
||||
if alt == "" {
|
||||
alt = name
|
||||
name = link
|
||||
}
|
||||
|
||||
// make the childNode an image - if we can, we also place the alt
|
||||
childNode.Type = html.ElementNode
|
||||
childNode.Data = "img"
|
||||
childNode.DataAtom = atom.Img
|
||||
childNode.Attr = []html.Attribute{
|
||||
{Key: "src", Val: link},
|
||||
{Key: "title", Val: title},
|
||||
{Key: "alt", Val: alt},
|
||||
name += tail
|
||||
image := false
|
||||
switch ext := filepath.Ext(link); ext {
|
||||
// fast path: empty string, ignore
|
||||
case "":
|
||||
// leave image as false
|
||||
case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
|
||||
image = true
|
||||
}
|
||||
if alt == "" {
|
||||
childNode.Attr = childNode.Attr[:2]
|
||||
|
||||
childNode := &html.Node{}
|
||||
linkNode := &html.Node{
|
||||
FirstChild: childNode,
|
||||
LastChild: childNode,
|
||||
Type: html.ElementNode,
|
||||
Data: "a",
|
||||
DataAtom: atom.A,
|
||||
}
|
||||
} else {
|
||||
childNode.Parent = linkNode
|
||||
absoluteLink := isLinkStr(link)
|
||||
if !absoluteLink {
|
||||
if ctx.IsWiki {
|
||||
link = util.URLJoin("wiki", link)
|
||||
if image {
|
||||
link = strings.ReplaceAll(link, " ", "+")
|
||||
} else {
|
||||
link = strings.ReplaceAll(link, " ", "-")
|
||||
}
|
||||
if !strings.Contains(link, "/") {
|
||||
link = url.PathEscape(link)
|
||||
}
|
||||
link = util.URLJoin(urlPrefix, link)
|
||||
}
|
||||
childNode.Type = html.TextNode
|
||||
childNode.Data = name
|
||||
urlPrefix := ctx.URLPrefix
|
||||
if image {
|
||||
if !absoluteLink {
|
||||
if IsSameDomain(urlPrefix) {
|
||||
urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1)
|
||||
}
|
||||
if ctx.IsWiki {
|
||||
link = util.URLJoin("wiki", "raw", link)
|
||||
}
|
||||
link = util.URLJoin(urlPrefix, link)
|
||||
}
|
||||
title := props["title"]
|
||||
if title == "" {
|
||||
title = props["alt"]
|
||||
}
|
||||
if title == "" {
|
||||
title = path.Base(name)
|
||||
}
|
||||
alt := props["alt"]
|
||||
if alt == "" {
|
||||
alt = name
|
||||
}
|
||||
|
||||
// make the childNode an image - if we can, we also place the alt
|
||||
childNode.Type = html.ElementNode
|
||||
childNode.Data = "img"
|
||||
childNode.DataAtom = atom.Img
|
||||
childNode.Attr = []html.Attribute{
|
||||
{Key: "src", Val: link},
|
||||
{Key: "title", Val: title},
|
||||
{Key: "alt", Val: alt},
|
||||
}
|
||||
if alt == "" {
|
||||
childNode.Attr = childNode.Attr[:2]
|
||||
}
|
||||
} else {
|
||||
if !absoluteLink {
|
||||
if ctx.IsWiki {
|
||||
link = util.URLJoin("wiki", link)
|
||||
}
|
||||
link = util.URLJoin(urlPrefix, link)
|
||||
}
|
||||
childNode.Type = html.TextNode
|
||||
childNode.Data = name
|
||||
}
|
||||
if noLink {
|
||||
linkNode = childNode
|
||||
} else {
|
||||
linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
|
||||
}
|
||||
replaceContent(node, m[0], m[1], linkNode)
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
if noLink {
|
||||
linkNode = childNode
|
||||
} else {
|
||||
linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
|
||||
}
|
||||
replaceContent(node, m[0], m[1], linkNode)
|
||||
}
|
||||
|
||||
func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||
if ctx.Metas == nil {
|
||||
return
|
||||
}
|
||||
m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
link := node.Data[m[0]:m[1]]
|
||||
id := "#" + node.Data[m[2]:m[3]]
|
||||
|
||||
// extract repo and org name from matched link like
|
||||
// http://localhost:3000/gituser/myrepo/issues/1
|
||||
linkParts := strings.Split(path.Clean(link), "/")
|
||||
matchOrg := linkParts[len(linkParts)-4]
|
||||
matchRepo := linkParts[len(linkParts)-3]
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
link := node.Data[m[0]:m[1]]
|
||||
id := "#" + node.Data[m[2]:m[3]]
|
||||
|
||||
if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] {
|
||||
// TODO if m[4]:m[5] is not nil, then link is to a comment,
|
||||
// and we should indicate that in the text somehow
|
||||
replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue"))
|
||||
// extract repo and org name from matched link like
|
||||
// http://localhost:3000/gituser/myrepo/issues/1
|
||||
linkParts := strings.Split(path.Clean(link), "/")
|
||||
matchOrg := linkParts[len(linkParts)-4]
|
||||
matchRepo := linkParts[len(linkParts)-3]
|
||||
|
||||
} else {
|
||||
orgRepoID := matchOrg + "/" + matchRepo + id
|
||||
replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue"))
|
||||
if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] {
|
||||
// TODO if m[4]:m[5] is not nil, then link is to a comment,
|
||||
// and we should indicate that in the text somehow
|
||||
replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue"))
|
||||
} else {
|
||||
orgRepoID := matchOrg + "/" + matchRepo + id
|
||||
replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue"))
|
||||
}
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -782,70 +803,74 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
|
|||
if ctx.Metas == nil {
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
found bool
|
||||
ref *references.RenderizableReference
|
||||
)
|
||||
|
||||
_, exttrack := ctx.Metas["format"]
|
||||
alphanum := ctx.Metas["style"] == IssueNameStyleAlphanumeric
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
_, exttrack := ctx.Metas["format"]
|
||||
alphanum := ctx.Metas["style"] == IssueNameStyleAlphanumeric
|
||||
|
||||
// Repos with external issue trackers might still need to reference local PRs
|
||||
// We need to concern with the first one that shows up in the text, whichever it is
|
||||
found, ref = references.FindRenderizableReferenceNumeric(node.Data, exttrack && alphanum)
|
||||
if exttrack && alphanum {
|
||||
if found2, ref2 := references.FindRenderizableReferenceAlphanumeric(node.Data); found2 {
|
||||
if !found || ref2.RefLocation.Start < ref.RefLocation.Start {
|
||||
found = true
|
||||
ref = ref2
|
||||
// Repos with external issue trackers might still need to reference local PRs
|
||||
// We need to concern with the first one that shows up in the text, whichever it is
|
||||
found, ref = references.FindRenderizableReferenceNumeric(node.Data, exttrack && alphanum)
|
||||
if exttrack && alphanum {
|
||||
if found2, ref2 := references.FindRenderizableReferenceAlphanumeric(node.Data); found2 {
|
||||
if !found || ref2.RefLocation.Start < ref.RefLocation.Start {
|
||||
found = true
|
||||
ref = ref2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
return
|
||||
}
|
||||
|
||||
var link *html.Node
|
||||
reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End]
|
||||
if exttrack && !ref.IsPull {
|
||||
ctx.Metas["index"] = ref.Issue
|
||||
link = createLink(com.Expand(ctx.Metas["format"], ctx.Metas), reftext, "ref-issue")
|
||||
} else {
|
||||
// Path determines the type of link that will be rendered. It's unknown at this point whether
|
||||
// the linked item is actually a PR or an issue. Luckily it's of no real consequence because
|
||||
// Gitea will redirect on click as appropriate.
|
||||
path := "issues"
|
||||
if ref.IsPull {
|
||||
path = "pulls"
|
||||
if !found {
|
||||
return
|
||||
}
|
||||
if ref.Owner == "" {
|
||||
link = createLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], path, ref.Issue), reftext, "ref-issue")
|
||||
|
||||
var link *html.Node
|
||||
reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End]
|
||||
if exttrack && !ref.IsPull {
|
||||
ctx.Metas["index"] = ref.Issue
|
||||
link = createLink(com.Expand(ctx.Metas["format"], ctx.Metas), reftext, "ref-issue")
|
||||
} else {
|
||||
link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue")
|
||||
// Path determines the type of link that will be rendered. It's unknown at this point whether
|
||||
// the linked item is actually a PR or an issue. Luckily it's of no real consequence because
|
||||
// Gitea will redirect on click as appropriate.
|
||||
path := "issues"
|
||||
if ref.IsPull {
|
||||
path = "pulls"
|
||||
}
|
||||
if ref.Owner == "" {
|
||||
link = createLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], path, ref.Issue), reftext, "ref-issue")
|
||||
} else {
|
||||
link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ref.Action == references.XRefActionNone {
|
||||
replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
|
||||
return
|
||||
}
|
||||
if ref.Action == references.XRefActionNone {
|
||||
replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
|
||||
node = node.NextSibling.NextSibling
|
||||
continue
|
||||
}
|
||||
|
||||
// Decorate action keywords if actionable
|
||||
var keyword *html.Node
|
||||
if references.IsXrefActionable(ref, exttrack, alphanum) {
|
||||
keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
|
||||
} else {
|
||||
keyword = &html.Node{
|
||||
// Decorate action keywords if actionable
|
||||
var keyword *html.Node
|
||||
if references.IsXrefActionable(ref, exttrack, alphanum) {
|
||||
keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
|
||||
} else {
|
||||
keyword = &html.Node{
|
||||
Type: html.TextNode,
|
||||
Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End],
|
||||
}
|
||||
}
|
||||
spaces := &html.Node{
|
||||
Type: html.TextNode,
|
||||
Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End],
|
||||
Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
|
||||
}
|
||||
replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
|
||||
node = node.NextSibling.NextSibling.NextSibling.NextSibling
|
||||
}
|
||||
spaces := &html.Node{
|
||||
Type: html.TextNode,
|
||||
Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
|
||||
}
|
||||
replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
|
||||
}
|
||||
|
||||
// fullSha1PatternProcessor renders SHA containing URLs
|
||||
|
@ -853,86 +878,112 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
|
|||
if ctx.Metas == nil {
|
||||
return
|
||||
}
|
||||
m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
urlFull := node.Data[m[0]:m[1]]
|
||||
text := base.ShortSha(node.Data[m[2]:m[3]])
|
||||
|
||||
// 3rd capture group matches a optional path
|
||||
subpath := ""
|
||||
if m[5] > 0 {
|
||||
subpath = node.Data[m[4]:m[5]]
|
||||
}
|
||||
|
||||
// 4th capture group matches a optional url hash
|
||||
hash := ""
|
||||
if m[7] > 0 {
|
||||
hash = node.Data[m[6]:m[7]][1:]
|
||||
}
|
||||
|
||||
start := m[0]
|
||||
end := m[1]
|
||||
|
||||
// If url ends in '.', it's very likely that it is not part of the
|
||||
// actual url but used to finish a sentence.
|
||||
if strings.HasSuffix(urlFull, ".") {
|
||||
end--
|
||||
urlFull = urlFull[:len(urlFull)-1]
|
||||
if hash != "" {
|
||||
hash = hash[:len(hash)-1]
|
||||
} else if subpath != "" {
|
||||
subpath = subpath[:len(subpath)-1]
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if subpath != "" {
|
||||
text += subpath
|
||||
}
|
||||
urlFull := node.Data[m[0]:m[1]]
|
||||
text := base.ShortSha(node.Data[m[2]:m[3]])
|
||||
|
||||
if hash != "" {
|
||||
text += " (" + hash + ")"
|
||||
}
|
||||
// 3rd capture group matches a optional path
|
||||
subpath := ""
|
||||
if m[5] > 0 {
|
||||
subpath = node.Data[m[4]:m[5]]
|
||||
}
|
||||
|
||||
replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
|
||||
// 4th capture group matches a optional url hash
|
||||
hash := ""
|
||||
if m[7] > 0 {
|
||||
hash = node.Data[m[6]:m[7]][1:]
|
||||
}
|
||||
|
||||
start := m[0]
|
||||
end := m[1]
|
||||
|
||||
// If url ends in '.', it's very likely that it is not part of the
|
||||
// actual url but used to finish a sentence.
|
||||
if strings.HasSuffix(urlFull, ".") {
|
||||
end--
|
||||
urlFull = urlFull[:len(urlFull)-1]
|
||||
if hash != "" {
|
||||
hash = hash[:len(hash)-1]
|
||||
} else if subpath != "" {
|
||||
subpath = subpath[:len(subpath)-1]
|
||||
}
|
||||
}
|
||||
|
||||
if subpath != "" {
|
||||
text += subpath
|
||||
}
|
||||
|
||||
if hash != "" {
|
||||
text += " (" + hash + ")"
|
||||
}
|
||||
|
||||
replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
// emojiShortCodeProcessor for rendering text like :smile: into emoji
|
||||
func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
|
||||
m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
alias := node.Data[m[0]:m[1]]
|
||||
alias = strings.ReplaceAll(alias, ":", "")
|
||||
converted := emoji.FromAlias(alias)
|
||||
if converted == nil {
|
||||
// check if this is a custom reaction
|
||||
s := strings.Join(setting.UI.Reactions, " ") + "gitea"
|
||||
if strings.Contains(s, alias) {
|
||||
replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji"))
|
||||
start := 0
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next && start < len(node.Data) {
|
||||
m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
m[0] += start
|
||||
m[1] += start
|
||||
|
||||
replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
|
||||
start = m[1]
|
||||
|
||||
alias := node.Data[m[0]:m[1]]
|
||||
alias = strings.ReplaceAll(alias, ":", "")
|
||||
converted := emoji.FromAlias(alias)
|
||||
if converted == nil {
|
||||
// check if this is a custom reaction
|
||||
s := strings.Join(setting.UI.Reactions, " ") + "gitea"
|
||||
if strings.Contains(s, alias) {
|
||||
replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji"))
|
||||
node = node.NextSibling.NextSibling
|
||||
start = 0
|
||||
continue
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
|
||||
node = node.NextSibling.NextSibling
|
||||
start = 0
|
||||
}
|
||||
}
|
||||
|
||||
// emoji processor to match emoji and add emoji class
|
||||
func emojiProcessor(ctx *RenderContext, node *html.Node) {
|
||||
m := emoji.FindEmojiSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
start := 0
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next && start < len(node.Data) {
|
||||
m := emoji.FindEmojiSubmatchIndex(node.Data[start:])
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m[0] += start
|
||||
m[1] += start
|
||||
|
||||
codepoint := node.Data[m[0]:m[1]]
|
||||
val := emoji.FromCode(codepoint)
|
||||
if val != nil {
|
||||
replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
|
||||
codepoint := node.Data[m[0]:m[1]]
|
||||
start = m[1]
|
||||
val := emoji.FromCode(codepoint)
|
||||
if val != nil {
|
||||
replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
|
||||
node = node.NextSibling.NextSibling
|
||||
start = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -942,49 +993,70 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
|
|||
if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {
|
||||
return
|
||||
}
|
||||
m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
hash := node.Data[m[2]:m[3]]
|
||||
// The regex does not lie, it matches the hash pattern.
|
||||
// However, a regex cannot know if a hash actually exists or not.
|
||||
// We could assume that a SHA1 hash should probably contain alphas AND numerics
|
||||
// but that is not always the case.
|
||||
// Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash
|
||||
// as used by git and github for linking and thus we have to do similar.
|
||||
// Because of this, we check to make sure that a matched hash is actually
|
||||
// a commit in the repository before making it a link.
|
||||
if _, err := git.NewCommand("rev-parse", "--verify", hash).RunInDirBytes(ctx.Metas["repoPath"]); err != nil {
|
||||
if !strings.Contains(err.Error(), "fatal: Needed a single revision") {
|
||||
log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
replaceContent(node, m[2], m[3],
|
||||
createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
|
||||
start := 0
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next && start < len(node.Data) {
|
||||
m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:])
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m[2] += start
|
||||
m[3] += start
|
||||
|
||||
hash := node.Data[m[2]:m[3]]
|
||||
// The regex does not lie, it matches the hash pattern.
|
||||
// However, a regex cannot know if a hash actually exists or not.
|
||||
// We could assume that a SHA1 hash should probably contain alphas AND numerics
|
||||
// but that is not always the case.
|
||||
// Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash
|
||||
// as used by git and github for linking and thus we have to do similar.
|
||||
// Because of this, we check to make sure that a matched hash is actually
|
||||
// a commit in the repository before making it a link.
|
||||
if _, err := git.NewCommand("rev-parse", "--verify", hash).RunInDirBytes(ctx.Metas["repoPath"]); err != nil {
|
||||
if !strings.Contains(err.Error(), "fatal: Needed a single revision") {
|
||||
log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err)
|
||||
}
|
||||
start = m[3]
|
||||
continue
|
||||
}
|
||||
|
||||
replaceContent(node, m[2], m[3],
|
||||
createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
|
||||
start = 0
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
// emailAddressProcessor replaces raw email addresses with a mailto: link.
|
||||
func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
|
||||
m := emailRegex.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := emailRegex.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
mail := node.Data[m[2]:m[3]]
|
||||
replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
mail := node.Data[m[2]:m[3]]
|
||||
replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
|
||||
}
|
||||
|
||||
// linkProcessor creates links for any HTTP or HTTPS URL not captured by
|
||||
// markdown.
|
||||
func linkProcessor(ctx *RenderContext, node *html.Node) {
|
||||
m := common.LinkRegex.FindStringIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := common.LinkRegex.FindStringIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
uri := node.Data[m[0]:m[1]]
|
||||
replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
uri := node.Data[m[0]:m[1]]
|
||||
replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
|
||||
}
|
||||
|
||||
func genDefaultLinkProcessor(defaultLink string) processor {
|
||||
|
@ -1008,12 +1080,17 @@ func genDefaultLinkProcessor(defaultLink string) processor {
|
|||
|
||||
// descriptionLinkProcessor creates links for DescriptionHTML
|
||||
func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
|
||||
m := common.LinkRegex.FindStringIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := common.LinkRegex.FindStringIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
uri := node.Data[m[0]:m[1]]
|
||||
replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
uri := node.Data[m[0]:m[1]]
|
||||
replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
|
||||
}
|
||||
|
||||
func createDescriptionLink(href, content string) *html.Node {
|
||||
|
|
|
@ -464,3 +464,19 @@ func TestIssue16020(t *testing.T) {
|
|||
assert.NoError(t, err)
|
||||
assert.Equal(t, data, res.String())
|
||||
}
|
||||
|
||||
func BenchmarkEmojiPostprocess(b *testing.B) {
|
||||
data := "🥰 "
|
||||
for len(data) < 1<<16 {
|
||||
data += data
|
||||
}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var res strings.Builder
|
||||
err := PostProcess(&RenderContext{
|
||||
URLPrefix: "https://example.com",
|
||||
Metas: localMetas,
|
||||
}, strings.NewReader(data), &res)
|
||||
assert.NoError(b, err)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue