Merge pull request '[PORT] gitea#30139: Refactor markdown render' (#3259) from algernon/forgejo:gitea/port/30139 into forgejo

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3259
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
This commit is contained in:
Earl Warren 2024-04-16 10:08:52 +00:00
commit 4fc06cfd78
7 changed files with 358 additions and 251 deletions

View file

@ -4,17 +4,12 @@
package markdown
import (
"bytes"
"fmt"
"regexp"
"slices"
"strings"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/markup/common"
"code.gitea.io/gitea/modules/setting"
giteautil "code.gitea.io/gitea/modules/util"
"github.com/yuin/goldmark/ast"
east "github.com/yuin/goldmark/extension/ast"
@ -30,6 +25,12 @@ var byteMailto = []byte("mailto:")
// ASTTransformer is a default transformer of the goldmark tree.
type ASTTransformer struct{}
func (g *ASTTransformer) applyElementDir(n ast.Node) {
if markup.DefaultProcessorHelper.ElementDir != "" {
n.SetAttributeString("dir", []byte(markup.DefaultProcessorHelper.ElementDir))
}
}
// Transform transforms the given AST tree.
func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
firstChild := node.FirstChild()
@ -46,12 +47,6 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
tocMode = rc.TOC
}
applyElementDir := func(n ast.Node) {
if markup.DefaultProcessorHelper.ElementDir != "" {
n.SetAttributeString("dir", []byte(markup.DefaultProcessorHelper.ElementDir))
}
}
_ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
@ -59,135 +54,15 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
switch v := n.(type) {
case *ast.Heading:
for _, attr := range v.Attributes() {
if _, ok := attr.Value.([]byte); !ok {
v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
}
}
txt := n.Text(reader.Source())
header := markup.Header{
Text: util.BytesToReadOnlyString(txt),
Level: v.Level,
}
if id, found := v.AttributeString("id"); found {
header.ID = util.BytesToReadOnlyString(id.([]byte))
}
tocList = append(tocList, header)
applyElementDir(v)
g.transformHeading(ctx, v, reader, &tocList)
case *ast.Paragraph:
applyElementDir(v)
g.applyElementDir(v)
case *ast.Image:
// Images need two things:
//
// 1. Their src needs to munged to be a real value
// 2. If they're not wrapped with a link they need a link wrapper
// Check if the destination is a real link
if len(v.Destination) > 0 && !markup.IsLink(v.Destination) {
v.Destination = []byte(giteautil.URLJoin(
ctx.Links.ResolveMediaLink(ctx.IsWiki),
strings.TrimLeft(string(v.Destination), "/"),
))
}
parent := n.Parent()
// Create a link around image only if parent is not already a link
if _, ok := parent.(*ast.Link); !ok && parent != nil {
next := n.NextSibling()
// Create a link wrapper
wrap := ast.NewLink()
wrap.Destination = v.Destination
wrap.Title = v.Title
wrap.SetAttributeString("target", []byte("_blank"))
// Duplicate the current image node
image := ast.NewImage(ast.NewLink())
image.Destination = v.Destination
image.Title = v.Title
for _, attr := range v.Attributes() {
image.SetAttribute(attr.Name, attr.Value)
}
for child := v.FirstChild(); child != nil; {
next := child.NextSibling()
image.AppendChild(image, child)
child = next
}
// Append our duplicate image to the wrapper link
wrap.AppendChild(wrap, image)
// Wire in the next sibling
wrap.SetNextSibling(next)
// Replace the current node with the wrapper link
parent.ReplaceChild(parent, n, wrap)
// But most importantly ensure the next sibling is still on the old image too
v.SetNextSibling(next)
}
g.transformImage(ctx, v, reader)
case *ast.Link:
// Links need their href to munged to be a real value
link := v.Destination
// Do not process the link if it's not a link, starts with an hashtag
// (indicating it's an anchor link), starts with `mailto:` or any of the
// custom markdown URLs.
processLink := len(link) > 0 && !markup.IsLink(link) &&
link[0] != '#' && !bytes.HasPrefix(link, byteMailto) &&
!slices.ContainsFunc(setting.Markdown.CustomURLSchemes, func(s string) bool {
return bytes.HasPrefix(link, []byte(s+":"))
})
if processLink {
var base string
if ctx.IsWiki {
base = ctx.Links.WikiLink()
} else if ctx.Links.HasBranchInfo() {
base = ctx.Links.SrcLink()
} else {
base = ctx.Links.Base
}
link = []byte(giteautil.URLJoin(base, string(link)))
}
if len(link) > 0 && link[0] == '#' {
link = []byte("#user-content-" + string(link)[1:])
}
v.Destination = link
g.transformLink(ctx, v, reader)
case *ast.List:
if v.HasChildren() {
children := make([]ast.Node, 0, v.ChildCount())
child := v.FirstChild()
for child != nil {
children = append(children, child)
child = child.NextSibling()
}
v.RemoveChildren(v)
for _, child := range children {
listItem := child.(*ast.ListItem)
if !child.HasChildren() || !child.FirstChild().HasChildren() {
v.AppendChild(v, child)
continue
}
taskCheckBox, ok := child.FirstChild().FirstChild().(*east.TaskCheckBox)
if !ok {
v.AppendChild(v, child)
continue
}
newChild := NewTaskCheckBoxListItem(listItem)
newChild.IsChecked = taskCheckBox.IsChecked
newChild.SetAttributeString("class", []byte("task-list-item"))
segments := newChild.FirstChild().Lines()
if segments.Len() > 0 {
segment := segments.At(0)
newChild.SourcePosition = rc.metaLength + segment.Start
}
v.AppendChild(v, newChild)
}
}
applyElementDir(v)
g.transformList(ctx, v, reader, rc)
case *ast.Text:
if v.SoftLineBreak() && !v.HardLineBreak() {
if ctx.Metas["mode"] != "document" {
@ -197,10 +72,7 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
}
}
case *ast.CodeSpan:
colorContent := n.Text(reader.Source())
if matchColor(strings.ToLower(string(colorContent))) {
v.AppendChild(v, NewColorPreview(colorContent))
}
g.transformCodeSpan(ctx, v, reader)
}
return ast.WalkContinue, nil
})
@ -222,50 +94,6 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
}
}
type prefixedIDs struct {
values container.Set[string]
}
// Generate generates a new element id.
func (p *prefixedIDs) Generate(value []byte, kind ast.NodeKind) []byte {
dft := []byte("id")
if kind == ast.KindHeading {
dft = []byte("heading")
}
return p.GenerateWithDefault(value, dft)
}
// Generate generates a new element id.
func (p *prefixedIDs) GenerateWithDefault(value, dft []byte) []byte {
result := common.CleanValue(value)
if len(result) == 0 {
result = dft
}
if !bytes.HasPrefix(result, []byte("user-content-")) {
result = append([]byte("user-content-"), result...)
}
if p.values.Add(util.BytesToReadOnlyString(result)) {
return result
}
for i := 1; ; i++ {
newResult := fmt.Sprintf("%s-%d", result, i)
if p.values.Add(newResult) {
return []byte(newResult)
}
}
}
// Put puts a given element id to the used ids table.
func (p *prefixedIDs) Put(value []byte) {
p.values.Add(util.BytesToReadOnlyString(value))
}
func newPrefixedIDs() *prefixedIDs {
return &prefixedIDs{
values: make(container.Set[string]),
}
}
// NewHTMLRenderer creates a HTMLRenderer to render
// in the gitea form.
func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer {
@ -295,38 +123,6 @@ func (r *HTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(east.KindTaskCheckBox, r.renderTaskCheckBox)
}
// renderCodeSpan renders CodeSpan elements (like goldmark upstream does) but also renders ColorPreview elements.
// See #21474 for reference
func (r *HTMLRenderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
if entering {
if n.Attributes() != nil {
_, _ = w.WriteString("<code")
html.RenderAttributes(w, n, html.CodeAttributeFilter)
_ = w.WriteByte('>')
} else {
_, _ = w.WriteString("<code>")
}
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
switch v := c.(type) {
case *ast.Text:
segment := v.Segment
value := segment.Value(source)
if bytes.HasSuffix(value, []byte("\n")) {
r.Writer.RawWrite(w, value[:len(value)-1])
r.Writer.RawWrite(w, []byte(" "))
} else {
r.Writer.RawWrite(w, value)
}
case *ColorPreview:
_, _ = w.WriteString(fmt.Sprintf(`<span class="color-preview" style="background-color: %v"></span>`, string(v.Color)))
}
}
return ast.WalkSkipChildren, nil
}
_, _ = w.WriteString("</code>")
return ast.WalkContinue, nil
}
func (r *HTMLRenderer) renderDocument(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
n := node.(*ast.Document)
@ -415,38 +211,3 @@ func (r *HTMLRenderer) renderIcon(w util.BufWriter, source []byte, node ast.Node
return ast.WalkContinue, nil
}
func (r *HTMLRenderer) renderTaskCheckBoxListItem(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
n := node.(*TaskCheckBoxListItem)
if entering {
if n.Attributes() != nil {
_, _ = w.WriteString("<li")
html.RenderAttributes(w, n, html.ListItemAttributeFilter)
_ = w.WriteByte('>')
} else {
_, _ = w.WriteString("<li>")
}
fmt.Fprintf(w, `<input type="checkbox" disabled="" data-source-position="%d"`, n.SourcePosition)
if n.IsChecked {
_, _ = w.WriteString(` checked=""`)
}
if r.XHTML {
_, _ = w.WriteString(` />`)
} else {
_ = w.WriteByte('>')
}
fc := n.FirstChild()
if fc != nil {
if _, ok := fc.(*ast.TextBlock); !ok {
_ = w.WriteByte('\n')
}
}
} else {
_, _ = w.WriteString("</li>\n")
}
return ast.WalkContinue, nil
}
func (r *HTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
return ast.WalkContinue, nil
}

View file

@ -0,0 +1,59 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package markdown
import (
"bytes"
"fmt"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/markup/common"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/util"
)
type prefixedIDs struct {
values container.Set[string]
}
// Generate generates a new element id.
func (p *prefixedIDs) Generate(value []byte, kind ast.NodeKind) []byte {
dft := []byte("id")
if kind == ast.KindHeading {
dft = []byte("heading")
}
return p.GenerateWithDefault(value, dft)
}
// GenerateWithDefault generates a new element id.
func (p *prefixedIDs) GenerateWithDefault(value, dft []byte) []byte {
result := common.CleanValue(value)
if len(result) == 0 {
result = dft
}
if !bytes.HasPrefix(result, []byte("user-content-")) {
result = append([]byte("user-content-"), result...)
}
if p.values.Add(util.BytesToReadOnlyString(result)) {
return result
}
for i := 1; ; i++ {
newResult := fmt.Sprintf("%s-%d", result, i)
if p.values.Add(newResult) {
return []byte(newResult)
}
}
}
// Put puts a given element id to the used ids table.
func (p *prefixedIDs) Put(value []byte) {
p.values.Add(util.BytesToReadOnlyString(value))
}
func newPrefixedIDs() *prefixedIDs {
return &prefixedIDs{
values: make(container.Set[string]),
}
}

View file

@ -0,0 +1,56 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package markdown
import (
"bytes"
"fmt"
"strings"
"code.gitea.io/gitea/modules/markup"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
// renderCodeSpan renders CodeSpan elements (like goldmark upstream does) but also renders ColorPreview elements.
// See #21474 for reference
func (r *HTMLRenderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
if entering {
if n.Attributes() != nil {
_, _ = w.WriteString("<code")
html.RenderAttributes(w, n, html.CodeAttributeFilter)
_ = w.WriteByte('>')
} else {
_, _ = w.WriteString("<code>")
}
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
switch v := c.(type) {
case *ast.Text:
segment := v.Segment
value := segment.Value(source)
if bytes.HasSuffix(value, []byte("\n")) {
r.Writer.RawWrite(w, value[:len(value)-1])
r.Writer.RawWrite(w, []byte(" "))
} else {
r.Writer.RawWrite(w, value)
}
case *ColorPreview:
_, _ = w.WriteString(fmt.Sprintf(`<span class="color-preview" style="background-color: %v"></span>`, string(v.Color)))
}
}
return ast.WalkSkipChildren, nil
}
_, _ = w.WriteString("</code>")
return ast.WalkContinue, nil
}
func (g *ASTTransformer) transformCodeSpan(ctx *markup.RenderContext, v *ast.CodeSpan, reader text.Reader) {
colorContent := v.Text(reader.Source())
if matchColor(strings.ToLower(string(colorContent))) {
v.AppendChild(v, NewColorPreview(colorContent))
}
}

View file

@ -0,0 +1,32 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package markdown
import (
"fmt"
"code.gitea.io/gitea/modules/markup"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
func (g *ASTTransformer) transformHeading(ctx *markup.RenderContext, v *ast.Heading, reader text.Reader, tocList *[]markup.Header) {
for _, attr := range v.Attributes() {
if _, ok := attr.Value.([]byte); !ok {
v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
}
}
txt := v.Text(reader.Source())
header := markup.Header{
Text: util.BytesToReadOnlyString(txt),
Level: v.Level,
}
if id, found := v.AttributeString("id"); found {
header.ID = util.BytesToReadOnlyString(id.([]byte))
}
*tocList = append(*tocList, header)
g.applyElementDir(v)
}

View file

@ -0,0 +1,66 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package markdown
import (
"strings"
"code.gitea.io/gitea/modules/markup"
giteautil "code.gitea.io/gitea/modules/util"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
)
func (g *ASTTransformer) transformImage(ctx *markup.RenderContext, v *ast.Image, reader text.Reader) {
// Images need two things:
//
// 1. Their src needs to munged to be a real value
// 2. If they're not wrapped with a link they need a link wrapper
// Check if the destination is a real link
if len(v.Destination) > 0 && !markup.IsLink(v.Destination) {
v.Destination = []byte(giteautil.URLJoin(
ctx.Links.ResolveMediaLink(ctx.IsWiki),
strings.TrimLeft(string(v.Destination), "/"),
))
}
parent := v.Parent()
// Create a link around image only if parent is not already a link
if _, ok := parent.(*ast.Link); !ok && parent != nil {
next := v.NextSibling()
// Create a link wrapper
wrap := ast.NewLink()
wrap.Destination = v.Destination
wrap.Title = v.Title
wrap.SetAttributeString("target", []byte("_blank"))
// Duplicate the current image node
image := ast.NewImage(ast.NewLink())
image.Destination = v.Destination
image.Title = v.Title
for _, attr := range v.Attributes() {
image.SetAttribute(attr.Name, attr.Value)
}
for child := v.FirstChild(); child != nil; {
next := child.NextSibling()
image.AppendChild(image, child)
child = next
}
// Append our duplicate image to the wrapper link
wrap.AppendChild(wrap, image)
// Wire in the next sibling
wrap.SetNextSibling(next)
// Replace the current node with the wrapper link
parent.ReplaceChild(parent, v, wrap)
// But most importantly ensure the next sibling is still on the old image too
v.SetNextSibling(next)
}
}

View file

@ -0,0 +1,47 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package markdown
import (
"bytes"
"slices"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
giteautil "code.gitea.io/gitea/modules/util"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
)
func (g *ASTTransformer) transformLink(ctx *markup.RenderContext, v *ast.Link, reader text.Reader) {
// Links need their href to munged to be a real value
link := v.Destination
// Do not process the link if it's not a link, starts with an hashtag
// (indicating it's an anchor link), starts with `mailto:` or any of the
// custom markdown URLs.
processLink := len(link) > 0 && !markup.IsLink(link) &&
link[0] != '#' && !bytes.HasPrefix(link, byteMailto) &&
!slices.ContainsFunc(setting.Markdown.CustomURLSchemes, func(s string) bool {
return bytes.HasPrefix(link, []byte(s+":"))
})
if processLink {
var base string
if ctx.IsWiki {
base = ctx.Links.WikiLink()
} else if ctx.Links.HasBranchInfo() {
base = ctx.Links.SrcLink()
} else {
base = ctx.Links.Base
}
link = []byte(giteautil.URLJoin(base, string(link)))
}
if len(link) > 0 && link[0] == '#' {
link = []byte("#user-content-" + string(link)[1:])
}
v.Destination = link
}

View file

@ -0,0 +1,86 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package markdown
import (
"fmt"
"code.gitea.io/gitea/modules/markup"
"github.com/yuin/goldmark/ast"
east "github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
func (r *HTMLRenderer) renderTaskCheckBoxListItem(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
n := node.(*TaskCheckBoxListItem)
if entering {
if n.Attributes() != nil {
_, _ = w.WriteString("<li")
html.RenderAttributes(w, n, html.ListItemAttributeFilter)
_ = w.WriteByte('>')
} else {
_, _ = w.WriteString("<li>")
}
fmt.Fprintf(w, `<input type="checkbox" disabled="" data-source-position="%d"`, n.SourcePosition)
if n.IsChecked {
_, _ = w.WriteString(` checked=""`)
}
if r.XHTML {
_, _ = w.WriteString(` />`)
} else {
_ = w.WriteByte('>')
}
fc := n.FirstChild()
if fc != nil {
if _, ok := fc.(*ast.TextBlock); !ok {
_ = w.WriteByte('\n')
}
}
} else {
_, _ = w.WriteString("</li>\n")
}
return ast.WalkContinue, nil
}
func (r *HTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
return ast.WalkContinue, nil
}
func (g *ASTTransformer) transformList(ctx *markup.RenderContext, v *ast.List, reader text.Reader, rc *RenderConfig) {
if v.HasChildren() {
children := make([]ast.Node, 0, v.ChildCount())
child := v.FirstChild()
for child != nil {
children = append(children, child)
child = child.NextSibling()
}
v.RemoveChildren(v)
for _, child := range children {
listItem := child.(*ast.ListItem)
if !child.HasChildren() || !child.FirstChild().HasChildren() {
v.AppendChild(v, child)
continue
}
taskCheckBox, ok := child.FirstChild().FirstChild().(*east.TaskCheckBox)
if !ok {
v.AppendChild(v, child)
continue
}
newChild := NewTaskCheckBoxListItem(listItem)
newChild.IsChecked = taskCheckBox.IsChecked
newChild.SetAttributeString("class", []byte("task-list-item"))
segments := newChild.FirstChild().Lines()
if segments.Len() > 0 {
segment := segments.At(0)
newChild.SourcePosition = rc.metaLength + segment.Start
}
v.AppendChild(v, newChild)
}
}
g.applyElementDir(v)
}