2022-08-13 18:32:34 +00:00
// Copyright 2022 The Gitea Authors. All rights reserved.
2022-11-27 18:20:29 +00:00
// SPDX-License-Identifier: MIT
2022-01-07 01:18:52 +00:00
2022-08-13 18:32:34 +00:00
//go:generate go run invisible/generate.go -v -o ./invisible_gen.go
//go:generate go run ambiguous/generate.go -v -o ./ambiguous_gen.go ambiguous/ambiguous.json
2022-01-07 01:18:52 +00:00
package charset
import (
2022-12-17 20:22:25 +00:00
"bufio"
2022-01-07 01:18:52 +00:00
"io"
"strings"
2022-08-13 18:32:34 +00:00
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/translation"
2022-01-07 01:18:52 +00:00
)
2022-08-13 18:32:34 +00:00
// RuneNBSP is the codepoint for NBSP
const RuneNBSP = 0xa0
2022-01-07 01:18:52 +00:00
2022-08-13 18:32:34 +00:00
// EscapeControlHTML escapes the unicode control sequences in a provided html document
func EscapeControlHTML ( text string , locale translation . Locale , allowed ... rune ) ( escaped * EscapeStatus , output string ) {
2022-01-07 01:18:52 +00:00
sb := & strings . Builder { }
2022-08-13 18:32:34 +00:00
outputStream := & HTMLStreamerWriter { Writer : sb }
streamer := NewEscapeStreamer ( locale , outputStream , allowed ... ) . ( * escapeStreamer )
2022-01-07 01:18:52 +00:00
2022-08-13 18:32:34 +00:00
if err := StreamHTML ( strings . NewReader ( text ) , streamer ) ; err != nil {
streamer . escaped . HasError = true
log . Error ( "Error whilst escaping: %v" , err )
}
return streamer . escaped , sb . String ( )
2022-01-07 01:18:52 +00:00
}
2022-12-17 20:22:25 +00:00
// EscapeControlReaders escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte
2022-08-13 18:32:34 +00:00
func EscapeControlReader ( reader io . Reader , writer io . Writer , locale translation . Locale , allowed ... rune ) ( escaped * EscapeStatus , err error ) {
outputStream := & HTMLStreamerWriter { Writer : writer }
streamer := NewEscapeStreamer ( locale , outputStream , allowed ... ) . ( * escapeStreamer )
2022-01-07 01:18:52 +00:00
2022-08-13 18:32:34 +00:00
if err = StreamHTML ( reader , streamer ) ; err != nil {
streamer . escaped . HasError = true
log . Error ( "Error whilst escaping: %v" , err )
2022-01-07 01:18:52 +00:00
}
2022-08-13 18:32:34 +00:00
return streamer . escaped , err
2022-01-07 01:18:52 +00:00
}
2023-02-02 04:51:02 +00:00
// EscapeControlStringReader escapes the unicode control sequences in a provided reader of string content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte. HTML line breaks are not inserted after every newline by this method.
2022-12-17 20:22:25 +00:00
func EscapeControlStringReader ( reader io . Reader , writer io . Writer , locale translation . Locale , allowed ... rune ) ( escaped * EscapeStatus , err error ) {
bufRd := bufio . NewReader ( reader )
outputStream := & HTMLStreamerWriter { Writer : writer }
streamer := NewEscapeStreamer ( locale , outputStream , allowed ... ) . ( * escapeStreamer )
for {
line , rdErr := bufRd . ReadString ( '\n' )
if len ( line ) > 0 {
if err := streamer . Text ( line ) ; err != nil {
streamer . escaped . HasError = true
log . Error ( "Error whilst escaping: %v" , err )
return streamer . escaped , err
}
}
if rdErr != nil {
if rdErr != io . EOF {
err = rdErr
}
break
}
}
return streamer . escaped , err
}
2022-08-13 18:32:34 +00:00
// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string
func EscapeControlString ( text string , locale translation . Locale , allowed ... rune ) ( escaped * EscapeStatus , output string ) {
sb := & strings . Builder { }
outputStream := & HTMLStreamerWriter { Writer : sb }
streamer := NewEscapeStreamer ( locale , outputStream , allowed ... ) . ( * escapeStreamer )
2022-01-07 01:18:52 +00:00
2022-08-13 18:32:34 +00:00
if err := streamer . Text ( text ) ; err != nil {
streamer . escaped . HasError = true
log . Error ( "Error whilst escaping: %v" , err )
}
return streamer . escaped , sb . String ( )
2022-01-07 01:18:52 +00:00
}