mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2024-12-15 03:36:37 +00:00
660 lines
13 KiB
Go
660 lines
13 KiB
Go
package text
|
|
|
|
import (
|
|
"bytes"
|
|
"io"
|
|
"regexp"
|
|
"unicode/utf8"
|
|
|
|
"github.com/yuin/goldmark/util"
|
|
)
|
|
|
|
const invalidValue = -1
|
|
|
|
// EOF indicates the end of file.
|
|
const EOF = byte(0xff)
|
|
|
|
// A Reader interface provides abstracted method for reading text.
|
|
type Reader interface {
|
|
io.RuneReader
|
|
|
|
// Source returns a source of the reader.
|
|
Source() []byte
|
|
|
|
// ResetPosition resets positions.
|
|
ResetPosition()
|
|
|
|
// Peek returns a byte at current position without advancing the internal pointer.
|
|
Peek() byte
|
|
|
|
// PeekLine returns the current line without advancing the internal pointer.
|
|
PeekLine() ([]byte, Segment)
|
|
|
|
// PrecendingCharacter returns a character just before current internal pointer.
|
|
PrecendingCharacter() rune
|
|
|
|
// Value returns a value of the given segment.
|
|
Value(Segment) []byte
|
|
|
|
// LineOffset returns a distance from the line head to current position.
|
|
LineOffset() int
|
|
|
|
// Position returns current line number and position.
|
|
Position() (int, Segment)
|
|
|
|
// SetPosition sets current line number and position.
|
|
SetPosition(int, Segment)
|
|
|
|
// SetPadding sets padding to the reader.
|
|
SetPadding(int)
|
|
|
|
// Advance advances the internal pointer.
|
|
Advance(int)
|
|
|
|
// AdvanceAndSetPadding advances the internal pointer and add padding to the
|
|
// reader.
|
|
AdvanceAndSetPadding(int, int)
|
|
|
|
// AdvanceLine advances the internal pointer to the next line head.
|
|
AdvanceLine()
|
|
|
|
// SkipSpaces skips space characters and returns a non-blank line.
|
|
// If it reaches EOF, returns false.
|
|
SkipSpaces() (Segment, int, bool)
|
|
|
|
// SkipSpaces skips blank lines and returns a non-blank line.
|
|
// If it reaches EOF, returns false.
|
|
SkipBlankLines() (Segment, int, bool)
|
|
|
|
// Match performs regular expression matching to current line.
|
|
Match(reg *regexp.Regexp) bool
|
|
|
|
// Match performs regular expression searching to current line.
|
|
FindSubMatch(reg *regexp.Regexp) [][]byte
|
|
|
|
// FindClosure finds corresponding closure.
|
|
FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
|
|
}
|
|
|
|
// FindClosureOptions is options for Reader.FindClosure.
|
|
type FindClosureOptions struct {
|
|
// CodeSpan is a flag for the FindClosure. If this is set to true,
|
|
// FindClosure ignores closers in codespans.
|
|
CodeSpan bool
|
|
|
|
// Nesting is a flag for the FindClosure. If this is set to true,
|
|
// FindClosure allows nesting.
|
|
Nesting bool
|
|
|
|
// Newline is a flag for the FindClosure. If this is set to true,
|
|
// FindClosure searches for a closer over multiple lines.
|
|
Newline bool
|
|
|
|
// Advance is a flag for the FindClosure. If this is set to true,
|
|
// FindClosure advances pointers when closer is found.
|
|
Advance bool
|
|
}
|
|
|
|
type reader struct {
|
|
source []byte
|
|
sourceLength int
|
|
line int
|
|
peekedLine []byte
|
|
pos Segment
|
|
head int
|
|
lineOffset int
|
|
}
|
|
|
|
// NewReader return a new Reader that can read UTF-8 bytes .
|
|
func NewReader(source []byte) Reader {
|
|
r := &reader{
|
|
source: source,
|
|
sourceLength: len(source),
|
|
}
|
|
r.ResetPosition()
|
|
return r
|
|
}
|
|
|
|
func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
|
|
return findClosureReader(r, opener, closer, options)
|
|
}
|
|
|
|
func (r *reader) ResetPosition() {
|
|
r.line = -1
|
|
r.head = 0
|
|
r.lineOffset = -1
|
|
r.AdvanceLine()
|
|
}
|
|
|
|
func (r *reader) Source() []byte {
|
|
return r.source
|
|
}
|
|
|
|
func (r *reader) Value(seg Segment) []byte {
|
|
return seg.Value(r.source)
|
|
}
|
|
|
|
func (r *reader) Peek() byte {
|
|
if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
|
|
if r.pos.Padding != 0 {
|
|
return space[0]
|
|
}
|
|
return r.source[r.pos.Start]
|
|
}
|
|
return EOF
|
|
}
|
|
|
|
func (r *reader) PeekLine() ([]byte, Segment) {
|
|
if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
|
|
if r.peekedLine == nil {
|
|
r.peekedLine = r.pos.Value(r.Source())
|
|
}
|
|
return r.peekedLine, r.pos
|
|
}
|
|
return nil, r.pos
|
|
}
|
|
|
|
// io.RuneReader interface.
|
|
func (r *reader) ReadRune() (rune, int, error) {
|
|
return readRuneReader(r)
|
|
}
|
|
|
|
func (r *reader) LineOffset() int {
|
|
if r.lineOffset < 0 {
|
|
v := 0
|
|
for i := r.head; i < r.pos.Start; i++ {
|
|
if r.source[i] == '\t' {
|
|
v += util.TabWidth(v)
|
|
} else {
|
|
v++
|
|
}
|
|
}
|
|
r.lineOffset = v - r.pos.Padding
|
|
}
|
|
return r.lineOffset
|
|
}
|
|
|
|
func (r *reader) PrecendingCharacter() rune {
|
|
if r.pos.Start <= 0 {
|
|
if r.pos.Padding != 0 {
|
|
return rune(' ')
|
|
}
|
|
return rune('\n')
|
|
}
|
|
i := r.pos.Start - 1
|
|
for ; i >= 0; i-- {
|
|
if utf8.RuneStart(r.source[i]) {
|
|
break
|
|
}
|
|
}
|
|
rn, _ := utf8.DecodeRune(r.source[i:])
|
|
return rn
|
|
}
|
|
|
|
func (r *reader) Advance(n int) {
|
|
r.lineOffset = -1
|
|
if n < len(r.peekedLine) && r.pos.Padding == 0 {
|
|
r.pos.Start += n
|
|
r.peekedLine = nil
|
|
return
|
|
}
|
|
r.peekedLine = nil
|
|
l := r.sourceLength
|
|
for ; n > 0 && r.pos.Start < l; n-- {
|
|
if r.pos.Padding != 0 {
|
|
r.pos.Padding--
|
|
continue
|
|
}
|
|
if r.source[r.pos.Start] == '\n' {
|
|
r.AdvanceLine()
|
|
continue
|
|
}
|
|
r.pos.Start++
|
|
}
|
|
}
|
|
|
|
func (r *reader) AdvanceAndSetPadding(n, padding int) {
|
|
r.Advance(n)
|
|
if padding > r.pos.Padding {
|
|
r.SetPadding(padding)
|
|
}
|
|
}
|
|
|
|
func (r *reader) AdvanceLine() {
|
|
r.lineOffset = -1
|
|
r.peekedLine = nil
|
|
r.pos.Start = r.pos.Stop
|
|
r.head = r.pos.Start
|
|
if r.pos.Start < 0 {
|
|
return
|
|
}
|
|
r.pos.Stop = r.sourceLength
|
|
for i := r.pos.Start; i < r.sourceLength; i++ {
|
|
c := r.source[i]
|
|
if c == '\n' {
|
|
r.pos.Stop = i + 1
|
|
break
|
|
}
|
|
}
|
|
r.line++
|
|
r.pos.Padding = 0
|
|
}
|
|
|
|
func (r *reader) Position() (int, Segment) {
|
|
return r.line, r.pos
|
|
}
|
|
|
|
func (r *reader) SetPosition(line int, pos Segment) {
|
|
r.lineOffset = -1
|
|
r.line = line
|
|
r.pos = pos
|
|
}
|
|
|
|
func (r *reader) SetPadding(v int) {
|
|
r.pos.Padding = v
|
|
}
|
|
|
|
func (r *reader) SkipSpaces() (Segment, int, bool) {
|
|
return skipSpacesReader(r)
|
|
}
|
|
|
|
func (r *reader) SkipBlankLines() (Segment, int, bool) {
|
|
return skipBlankLinesReader(r)
|
|
}
|
|
|
|
func (r *reader) Match(reg *regexp.Regexp) bool {
|
|
return matchReader(r, reg)
|
|
}
|
|
|
|
func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
|
|
return findSubMatchReader(r, reg)
|
|
}
|
|
|
|
// A BlockReader interface is a reader that is optimized for Blocks.
|
|
type BlockReader interface {
|
|
Reader
|
|
// Reset resets current state and sets new segments to the reader.
|
|
Reset(segment *Segments)
|
|
}
|
|
|
|
type blockReader struct {
|
|
source []byte
|
|
segments *Segments
|
|
segmentsLength int
|
|
line int
|
|
pos Segment
|
|
head int
|
|
last int
|
|
lineOffset int
|
|
}
|
|
|
|
// NewBlockReader returns a new BlockReader.
|
|
func NewBlockReader(source []byte, segments *Segments) BlockReader {
|
|
r := &blockReader{
|
|
source: source,
|
|
}
|
|
if segments != nil {
|
|
r.Reset(segments)
|
|
}
|
|
return r
|
|
}
|
|
|
|
func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
|
|
return findClosureReader(r, opener, closer, options)
|
|
}
|
|
|
|
func (r *blockReader) ResetPosition() {
|
|
r.line = -1
|
|
r.head = 0
|
|
r.last = 0
|
|
r.lineOffset = -1
|
|
r.pos.Start = -1
|
|
r.pos.Stop = -1
|
|
r.pos.Padding = 0
|
|
if r.segmentsLength > 0 {
|
|
last := r.segments.At(r.segmentsLength - 1)
|
|
r.last = last.Stop
|
|
}
|
|
r.AdvanceLine()
|
|
}
|
|
|
|
func (r *blockReader) Reset(segments *Segments) {
|
|
r.segments = segments
|
|
r.segmentsLength = segments.Len()
|
|
r.ResetPosition()
|
|
}
|
|
|
|
func (r *blockReader) Source() []byte {
|
|
return r.source
|
|
}
|
|
|
|
func (r *blockReader) Value(seg Segment) []byte {
|
|
line := r.segmentsLength - 1
|
|
ret := make([]byte, 0, seg.Stop-seg.Start+1)
|
|
for ; line >= 0; line-- {
|
|
if seg.Start >= r.segments.At(line).Start {
|
|
break
|
|
}
|
|
}
|
|
i := seg.Start
|
|
for ; line < r.segmentsLength; line++ {
|
|
s := r.segments.At(line)
|
|
if i < 0 {
|
|
i = s.Start
|
|
}
|
|
ret = s.ConcatPadding(ret)
|
|
for ; i < seg.Stop && i < s.Stop; i++ {
|
|
ret = append(ret, r.source[i])
|
|
}
|
|
i = -1
|
|
if s.Stop > seg.Stop {
|
|
break
|
|
}
|
|
}
|
|
return ret
|
|
}
|
|
|
|
// io.RuneReader interface.
|
|
func (r *blockReader) ReadRune() (rune, int, error) {
|
|
return readRuneReader(r)
|
|
}
|
|
|
|
func (r *blockReader) PrecendingCharacter() rune {
|
|
if r.pos.Padding != 0 {
|
|
return rune(' ')
|
|
}
|
|
if r.segments.Len() < 1 {
|
|
return rune('\n')
|
|
}
|
|
firstSegment := r.segments.At(0)
|
|
if r.line == 0 && r.pos.Start <= firstSegment.Start {
|
|
return rune('\n')
|
|
}
|
|
l := len(r.source)
|
|
i := r.pos.Start - 1
|
|
for ; i < l && i >= 0; i-- {
|
|
if utf8.RuneStart(r.source[i]) {
|
|
break
|
|
}
|
|
}
|
|
if i < 0 || i >= l {
|
|
return rune('\n')
|
|
}
|
|
rn, _ := utf8.DecodeRune(r.source[i:])
|
|
return rn
|
|
}
|
|
|
|
func (r *blockReader) LineOffset() int {
|
|
if r.lineOffset < 0 {
|
|
v := 0
|
|
for i := r.head; i < r.pos.Start; i++ {
|
|
if r.source[i] == '\t' {
|
|
v += util.TabWidth(v)
|
|
} else {
|
|
v++
|
|
}
|
|
}
|
|
r.lineOffset = v - r.pos.Padding
|
|
}
|
|
return r.lineOffset
|
|
}
|
|
|
|
func (r *blockReader) Peek() byte {
|
|
if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
|
|
if r.pos.Padding != 0 {
|
|
return space[0]
|
|
}
|
|
return r.source[r.pos.Start]
|
|
}
|
|
return EOF
|
|
}
|
|
|
|
func (r *blockReader) PeekLine() ([]byte, Segment) {
|
|
if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
|
|
return r.pos.Value(r.source), r.pos
|
|
}
|
|
return nil, r.pos
|
|
}
|
|
|
|
func (r *blockReader) Advance(n int) {
|
|
r.lineOffset = -1
|
|
|
|
if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
|
|
r.pos.Start += n
|
|
return
|
|
}
|
|
|
|
for ; n > 0; n-- {
|
|
if r.pos.Padding != 0 {
|
|
r.pos.Padding--
|
|
continue
|
|
}
|
|
if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
|
|
r.AdvanceLine()
|
|
continue
|
|
}
|
|
r.pos.Start++
|
|
}
|
|
}
|
|
|
|
func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
|
|
r.Advance(n)
|
|
if padding > r.pos.Padding {
|
|
r.SetPadding(padding)
|
|
}
|
|
}
|
|
|
|
func (r *blockReader) AdvanceLine() {
|
|
r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
|
|
r.head = r.pos.Start
|
|
}
|
|
|
|
func (r *blockReader) Position() (int, Segment) {
|
|
return r.line, r.pos
|
|
}
|
|
|
|
func (r *blockReader) SetPosition(line int, pos Segment) {
|
|
r.lineOffset = -1
|
|
r.line = line
|
|
if pos.Start == invalidValue {
|
|
if r.line < r.segmentsLength {
|
|
s := r.segments.At(line)
|
|
r.head = s.Start
|
|
r.pos = s
|
|
}
|
|
} else {
|
|
r.pos = pos
|
|
if r.line < r.segmentsLength {
|
|
s := r.segments.At(line)
|
|
r.head = s.Start
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *blockReader) SetPadding(v int) {
|
|
r.lineOffset = -1
|
|
r.pos.Padding = v
|
|
}
|
|
|
|
func (r *blockReader) SkipSpaces() (Segment, int, bool) {
|
|
return skipSpacesReader(r)
|
|
}
|
|
|
|
func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
|
|
return skipBlankLinesReader(r)
|
|
}
|
|
|
|
func (r *blockReader) Match(reg *regexp.Regexp) bool {
|
|
return matchReader(r, reg)
|
|
}
|
|
|
|
func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
|
|
return findSubMatchReader(r, reg)
|
|
}
|
|
|
|
func skipBlankLinesReader(r Reader) (Segment, int, bool) {
|
|
lines := 0
|
|
for {
|
|
line, seg := r.PeekLine()
|
|
if line == nil {
|
|
return seg, lines, false
|
|
}
|
|
if util.IsBlank(line) {
|
|
lines++
|
|
r.AdvanceLine()
|
|
} else {
|
|
return seg, lines, true
|
|
}
|
|
}
|
|
}
|
|
|
|
func skipSpacesReader(r Reader) (Segment, int, bool) {
|
|
chars := 0
|
|
for {
|
|
line, segment := r.PeekLine()
|
|
if line == nil {
|
|
return segment, chars, false
|
|
}
|
|
for i, c := range line {
|
|
if util.IsSpace(c) {
|
|
chars++
|
|
r.Advance(1)
|
|
continue
|
|
}
|
|
return segment.WithStart(segment.Start + i + 1), chars, true
|
|
}
|
|
}
|
|
}
|
|
|
|
func matchReader(r Reader, reg *regexp.Regexp) bool {
|
|
oldline, oldseg := r.Position()
|
|
match := reg.FindReaderSubmatchIndex(r)
|
|
r.SetPosition(oldline, oldseg)
|
|
if match == nil {
|
|
return false
|
|
}
|
|
r.Advance(match[1] - match[0])
|
|
return true
|
|
}
|
|
|
|
func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
|
|
oldLine, oldSeg := r.Position()
|
|
match := reg.FindReaderSubmatchIndex(r)
|
|
r.SetPosition(oldLine, oldSeg)
|
|
if match == nil {
|
|
return nil
|
|
}
|
|
var bb bytes.Buffer
|
|
bb.Grow(match[1] - match[0])
|
|
for i := 0; i < match[1]; {
|
|
r, size, _ := readRuneReader(r)
|
|
i += size
|
|
bb.WriteRune(r)
|
|
}
|
|
bs := bb.Bytes()
|
|
var result [][]byte
|
|
for i := 0; i < len(match); i += 2 {
|
|
if match[i] < 0 {
|
|
result = append(result, []byte{})
|
|
continue
|
|
}
|
|
result = append(result, bs[match[i]:match[i+1]])
|
|
}
|
|
|
|
r.SetPosition(oldLine, oldSeg)
|
|
r.Advance(match[1] - match[0])
|
|
return result
|
|
}
|
|
|
|
func readRuneReader(r Reader) (rune, int, error) {
|
|
line, _ := r.PeekLine()
|
|
if line == nil {
|
|
return 0, 0, io.EOF
|
|
}
|
|
rn, size := utf8.DecodeRune(line)
|
|
if rn == utf8.RuneError {
|
|
return 0, 0, io.EOF
|
|
}
|
|
r.Advance(size)
|
|
return rn, size, nil
|
|
}
|
|
|
|
func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
|
|
opened := 1
|
|
codeSpanOpener := 0
|
|
closed := false
|
|
orgline, orgpos := r.Position()
|
|
var ret *Segments
|
|
|
|
for {
|
|
bs, seg := r.PeekLine()
|
|
if bs == nil {
|
|
goto end
|
|
}
|
|
i := 0
|
|
for i < len(bs) {
|
|
c := bs[i]
|
|
if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
|
|
codeSpanCloser := 0
|
|
for ; i < len(bs); i++ {
|
|
if bs[i] == '`' {
|
|
codeSpanCloser++
|
|
} else {
|
|
i--
|
|
break
|
|
}
|
|
}
|
|
if codeSpanCloser == codeSpanOpener {
|
|
codeSpanOpener = 0
|
|
}
|
|
} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
|
|
i += 2
|
|
continue
|
|
} else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
|
|
for ; i < len(bs); i++ {
|
|
if bs[i] == '`' {
|
|
codeSpanOpener++
|
|
} else {
|
|
i--
|
|
break
|
|
}
|
|
}
|
|
} else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
|
|
if c == closer {
|
|
opened--
|
|
if opened == 0 {
|
|
if ret == nil {
|
|
ret = NewSegments()
|
|
}
|
|
ret.Append(seg.WithStop(seg.Start + i))
|
|
r.Advance(i + 1)
|
|
closed = true
|
|
goto end
|
|
}
|
|
} else if c == opener {
|
|
if !opts.Nesting {
|
|
goto end
|
|
}
|
|
opened++
|
|
}
|
|
}
|
|
i++
|
|
}
|
|
if !opts.Newline {
|
|
goto end
|
|
}
|
|
r.AdvanceLine()
|
|
if ret == nil {
|
|
ret = NewSegments()
|
|
}
|
|
ret.Append(seg)
|
|
}
|
|
end:
|
|
if !opts.Advance {
|
|
r.SetPosition(orgline, orgpos)
|
|
}
|
|
if closed {
|
|
return ret, true
|
|
}
|
|
return nil, false
|
|
}
|