forgejo/modules/highlight/highlight.go
wxiaoguang a3f403f438
Add option to disable ambiguous unicode characters detection (#28454) (#28499)
Backport #28454 (the only conflict is caused by some comments)

* Close #24483
* Close #28123
* Close #23682
* Close #23149
2023-12-18 12:20:37 +08:00

224 lines
5.5 KiB
Go

// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2020 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package highlight
import (
"bufio"
"bytes"
"fmt"
gohtml "html"
"html/template"
"io"
"path/filepath"
"strings"
"sync"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/formatters/html"
"github.com/alecthomas/chroma/v2/lexers"
"github.com/alecthomas/chroma/v2/styles"
lru "github.com/hashicorp/golang-lru/v2"
)
// don't index files larger than this many bytes for performance purposes
const sizeLimit = 1024 * 1024
var (
// For custom user mapping
highlightMapping = map[string]string{}
once sync.Once
cache *lru.TwoQueueCache[string, any]
githubStyles = styles.Get("github")
)
// NewContext loads custom highlight map from local config
func NewContext() {
once.Do(func() {
highlightMapping = setting.GetHighlightMapping()
// The size 512 is simply a conservative rule of thumb
c, err := lru.New2Q[string, any](512)
if err != nil {
panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
}
cache = c
})
}
// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
func Code(fileName, language, code string) (output template.HTML, lexerName string) {
NewContext()
// diff view newline will be passed as empty, change to literal '\n' so it can be copied
// preserve literal newline in blame view
if code == "" || code == "\n" {
return "\n", ""
}
if len(code) > sizeLimit {
return template.HTML(template.HTMLEscapeString(code)), ""
}
var lexer chroma.Lexer
if len(language) > 0 {
lexer = lexers.Get(language)
if lexer == nil {
// Attempt stripping off the '?'
if idx := strings.IndexByte(language, '?'); idx > 0 {
lexer = lexers.Get(language[:idx])
}
}
}
if lexer == nil {
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
// use mapped value to find lexer
lexer = lexers.Get(val)
}
}
if lexer == nil {
if l, ok := cache.Get(fileName); ok {
lexer = l.(chroma.Lexer)
}
}
if lexer == nil {
lexer = lexers.Match(fileName)
if lexer == nil {
lexer = lexers.Fallback
}
cache.Add(fileName, lexer)
}
return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
}
// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
formatter := html.New(html.WithClasses(true),
html.WithLineNumbers(false),
html.PreventSurroundingPre(true),
)
htmlbuf := bytes.Buffer{}
htmlw := bufio.NewWriter(&htmlbuf)
iterator, err := lexer.Tokenise(nil, code)
if err != nil {
log.Error("Can't tokenize code: %v", err)
return template.HTML(template.HTMLEscapeString(code))
}
// style not used for live site but need to pass something
err = formatter.Format(htmlw, githubStyles, iterator)
if err != nil {
log.Error("Can't format code: %v", err)
return template.HTML(template.HTMLEscapeString(code))
}
_ = htmlw.Flush()
// Chroma will add newlines for certain lexers in order to highlight them properly
// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
}
// File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
NewContext()
if len(code) > sizeLimit {
return PlainText(code), "", nil
}
formatter := html.New(html.WithClasses(true),
html.WithLineNumbers(false),
html.PreventSurroundingPre(true),
)
var lexer chroma.Lexer
// provided language overrides everything
if language != "" {
lexer = lexers.Get(language)
}
if lexer == nil {
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
lexer = lexers.Get(val)
}
}
if lexer == nil {
guessLanguage := analyze.GetCodeLanguage(fileName, code)
lexer = lexers.Get(guessLanguage)
if lexer == nil {
lexer = lexers.Match(fileName)
if lexer == nil {
lexer = lexers.Fallback
}
}
}
lexerName := formatLexerName(lexer.Config().Name)
iterator, err := lexer.Tokenise(nil, string(code))
if err != nil {
return nil, "", fmt.Errorf("can't tokenize code: %w", err)
}
tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
htmlBuf := &bytes.Buffer{}
lines := make([]template.HTML, 0, len(tokensLines))
for _, tokens := range tokensLines {
iterator = chroma.Literator(tokens...)
err = formatter.Format(htmlBuf, githubStyles, iterator)
if err != nil {
return nil, "", fmt.Errorf("can't format code: %w", err)
}
lines = append(lines, template.HTML(htmlBuf.String()))
htmlBuf.Reset()
}
return lines, lexerName, nil
}
// PlainText returns non-highlighted HTML for code
func PlainText(code []byte) []template.HTML {
r := bufio.NewReader(bytes.NewReader(code))
m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
for {
content, err := r.ReadString('\n')
if err != nil && err != io.EOF {
log.Error("failed to read string from buffer: %v", err)
break
}
if content == "" && err == io.EOF {
break
}
s := template.HTML(gohtml.EscapeString(content))
m = append(m, s)
}
return m
}
func formatLexerName(name string) string {
if name == "fallback" {
return "Plaintext"
}
return util.ToTitleCaseNoLower(name)
}