mirror of
https://github.com/woodpecker-ci/woodpecker.git
synced 2024-11-27 12:21:03 +00:00
472 lines
11 KiB
Go
472 lines
11 KiB
Go
|
package syntax
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"strings"
|
||
|
)
|
||
|
|
||
|
type ParserOptions struct {
|
||
|
// NoLiterals disables OpChar merging into OpLiteral.
|
||
|
NoLiterals bool
|
||
|
}
|
||
|
|
||
|
func NewParser(opts *ParserOptions) *Parser {
|
||
|
return newParser(opts)
|
||
|
}
|
||
|
|
||
|
type Parser struct {
|
||
|
out Regexp
|
||
|
lexer lexer
|
||
|
exprPool []Expr
|
||
|
|
||
|
prefixParselets [256]prefixParselet
|
||
|
infixParselets [256]infixParselet
|
||
|
|
||
|
charClass []Expr
|
||
|
allocated uint
|
||
|
|
||
|
opts ParserOptions
|
||
|
}
|
||
|
|
||
|
// ParsePCRE parses PHP-style pattern with delimiters.
|
||
|
// An example of such pattern is `/foo/i`.
|
||
|
func (p *Parser) ParsePCRE(pattern string) (*RegexpPCRE, error) {
|
||
|
pcre, err := p.newPCRE(pattern)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
if pcre.HasModifier('x') {
|
||
|
return nil, errors.New("'x' modifier is not supported")
|
||
|
}
|
||
|
re, err := p.Parse(pcre.Pattern)
|
||
|
if re != nil {
|
||
|
pcre.Expr = re.Expr
|
||
|
}
|
||
|
return pcre, err
|
||
|
}
|
||
|
|
||
|
func (p *Parser) Parse(pattern string) (result *Regexp, err error) {
|
||
|
defer func() {
|
||
|
r := recover()
|
||
|
if r == nil {
|
||
|
return
|
||
|
}
|
||
|
if err2, ok := r.(ParseError); ok {
|
||
|
err = err2
|
||
|
return
|
||
|
}
|
||
|
panic(r)
|
||
|
}()
|
||
|
|
||
|
p.lexer.Init(pattern)
|
||
|
p.allocated = 0
|
||
|
p.out.Pattern = pattern
|
||
|
if pattern == "" {
|
||
|
p.out.Expr = *p.newExpr(OpConcat, Position{})
|
||
|
} else {
|
||
|
p.out.Expr = *p.parseExpr(0)
|
||
|
}
|
||
|
|
||
|
if !p.opts.NoLiterals {
|
||
|
p.mergeChars(&p.out.Expr)
|
||
|
}
|
||
|
p.setValues(&p.out.Expr)
|
||
|
|
||
|
return &p.out, nil
|
||
|
}
|
||
|
|
||
|
type prefixParselet func(token) *Expr
|
||
|
|
||
|
type infixParselet func(*Expr, token) *Expr
|
||
|
|
||
|
func newParser(opts *ParserOptions) *Parser {
|
||
|
var p Parser
|
||
|
|
||
|
if opts != nil {
|
||
|
p.opts = *opts
|
||
|
}
|
||
|
p.exprPool = make([]Expr, 256)
|
||
|
|
||
|
for tok, op := range tok2op {
|
||
|
if op != 0 {
|
||
|
p.prefixParselets[tokenKind(tok)] = p.parsePrefixElementary
|
||
|
}
|
||
|
}
|
||
|
|
||
|
p.prefixParselets[tokEscapeHexFull] = func(tok token) *Expr {
|
||
|
return p.newExprForm(OpEscapeHex, FormEscapeHexFull, tok.pos)
|
||
|
}
|
||
|
p.prefixParselets[tokEscapeUniFull] = func(tok token) *Expr {
|
||
|
return p.newExprForm(OpEscapeUni, FormEscapeUniFull, tok.pos)
|
||
|
}
|
||
|
|
||
|
p.prefixParselets[tokLparen] = func(tok token) *Expr { return p.parseGroup(OpCapture, tok) }
|
||
|
p.prefixParselets[tokLparenAtomic] = func(tok token) *Expr { return p.parseGroup(OpAtomicGroup, tok) }
|
||
|
p.prefixParselets[tokLparenPositiveLookahead] = func(tok token) *Expr { return p.parseGroup(OpPositiveLookahead, tok) }
|
||
|
p.prefixParselets[tokLparenNegativeLookahead] = func(tok token) *Expr { return p.parseGroup(OpNegativeLookahead, tok) }
|
||
|
p.prefixParselets[tokLparenPositiveLookbehind] = func(tok token) *Expr { return p.parseGroup(OpPositiveLookbehind, tok) }
|
||
|
p.prefixParselets[tokLparenNegativeLookbehind] = func(tok token) *Expr { return p.parseGroup(OpNegativeLookbehind, tok) }
|
||
|
|
||
|
p.prefixParselets[tokLparenName] = func(tok token) *Expr {
|
||
|
return p.parseNamedCapture(FormDefault, tok)
|
||
|
}
|
||
|
p.prefixParselets[tokLparenNameAngle] = func(tok token) *Expr {
|
||
|
return p.parseNamedCapture(FormNamedCaptureAngle, tok)
|
||
|
}
|
||
|
p.prefixParselets[tokLparenNameQuote] = func(tok token) *Expr {
|
||
|
return p.parseNamedCapture(FormNamedCaptureQuote, tok)
|
||
|
}
|
||
|
|
||
|
p.prefixParselets[tokLparenFlags] = p.parseGroupWithFlags
|
||
|
|
||
|
p.prefixParselets[tokPipe] = func(tok token) *Expr {
|
||
|
// We need prefix pipe parselet to handle `(|x)` syntax.
|
||
|
right := p.parseExpr(1)
|
||
|
return p.newExpr(OpAlt, tok.pos, p.newEmpty(tok.pos), right)
|
||
|
}
|
||
|
p.prefixParselets[tokLbracket] = func(tok token) *Expr {
|
||
|
return p.parseCharClass(OpCharClass, tok)
|
||
|
}
|
||
|
p.prefixParselets[tokLbracketCaret] = func(tok token) *Expr {
|
||
|
return p.parseCharClass(OpNegCharClass, tok)
|
||
|
}
|
||
|
|
||
|
p.infixParselets[tokRepeat] = func(left *Expr, tok token) *Expr {
|
||
|
repeatLit := p.newExpr(OpString, tok.pos)
|
||
|
return p.newExpr(OpRepeat, combinePos(left.Pos, tok.pos), left, repeatLit)
|
||
|
}
|
||
|
p.infixParselets[tokStar] = func(left *Expr, tok token) *Expr {
|
||
|
return p.newExpr(OpStar, combinePos(left.Pos, tok.pos), left)
|
||
|
}
|
||
|
p.infixParselets[tokConcat] = func(left *Expr, tok token) *Expr {
|
||
|
right := p.parseExpr(2)
|
||
|
if left.Op == OpConcat {
|
||
|
left.Args = append(left.Args, *right)
|
||
|
left.Pos.End = right.End()
|
||
|
return left
|
||
|
}
|
||
|
return p.newExpr(OpConcat, combinePos(left.Pos, right.Pos), left, right)
|
||
|
}
|
||
|
p.infixParselets[tokPipe] = p.parseAlt
|
||
|
p.infixParselets[tokMinus] = p.parseMinus
|
||
|
p.infixParselets[tokPlus] = p.parsePlus
|
||
|
p.infixParselets[tokQuestion] = p.parseQuestion
|
||
|
|
||
|
return &p
|
||
|
}
|
||
|
|
||
|
func (p *Parser) setValues(e *Expr) {
|
||
|
for i := range e.Args {
|
||
|
p.setValues(&e.Args[i])
|
||
|
}
|
||
|
e.Value = p.exprValue(e)
|
||
|
}
|
||
|
|
||
|
func (p *Parser) exprValue(e *Expr) string {
|
||
|
return p.out.Pattern[e.Begin():e.End()]
|
||
|
}
|
||
|
|
||
|
func (p *Parser) mergeChars(e *Expr) {
|
||
|
for i := range e.Args {
|
||
|
p.mergeChars(&e.Args[i])
|
||
|
}
|
||
|
if e.Op != OpConcat || len(e.Args) < 2 {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
args := e.Args[:0]
|
||
|
i := 0
|
||
|
for i < len(e.Args) {
|
||
|
first := i
|
||
|
chars := 0
|
||
|
for j := i; j < len(e.Args) && e.Args[j].Op == OpChar; j++ {
|
||
|
chars++
|
||
|
}
|
||
|
if chars > 1 {
|
||
|
c1 := e.Args[first]
|
||
|
c2 := e.Args[first+chars-1]
|
||
|
lit := p.newExpr(OpLiteral, combinePos(c1.Pos, c2.Pos))
|
||
|
for j := 0; j < chars; j++ {
|
||
|
lit.Args = append(lit.Args, e.Args[first+j])
|
||
|
}
|
||
|
args = append(args, *lit)
|
||
|
i += chars
|
||
|
} else {
|
||
|
args = append(args, e.Args[i])
|
||
|
i++
|
||
|
}
|
||
|
}
|
||
|
if len(args) == 1 {
|
||
|
*e = args[0] // Turn OpConcat into OpLiteral
|
||
|
} else {
|
||
|
e.Args = args
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (p *Parser) newEmpty(pos Position) *Expr {
|
||
|
return p.newExpr(OpConcat, pos)
|
||
|
}
|
||
|
|
||
|
func (p *Parser) newExprForm(op Operation, form Form, pos Position, args ...*Expr) *Expr {
|
||
|
e := p.newExpr(op, pos, args...)
|
||
|
e.Form = form
|
||
|
return e
|
||
|
}
|
||
|
|
||
|
func (p *Parser) newExpr(op Operation, pos Position, args ...*Expr) *Expr {
|
||
|
e := p.allocExpr()
|
||
|
*e = Expr{
|
||
|
Op: op,
|
||
|
Pos: pos,
|
||
|
Args: e.Args[:0],
|
||
|
}
|
||
|
for _, arg := range args {
|
||
|
e.Args = append(e.Args, *arg)
|
||
|
}
|
||
|
return e
|
||
|
}
|
||
|
|
||
|
func (p *Parser) allocExpr() *Expr {
|
||
|
i := p.allocated
|
||
|
if i < uint(len(p.exprPool)) {
|
||
|
p.allocated++
|
||
|
return &p.exprPool[i]
|
||
|
}
|
||
|
return &Expr{}
|
||
|
}
|
||
|
|
||
|
func (p *Parser) expect(kind tokenKind) Position {
|
||
|
tok := p.lexer.NextToken()
|
||
|
if tok.kind != kind {
|
||
|
throwErrorf(int(tok.pos.Begin), int(tok.pos.End), "expected '%s', found '%s'", kind, tok.kind)
|
||
|
}
|
||
|
return tok.pos
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parseExpr(precedence int) *Expr {
|
||
|
tok := p.lexer.NextToken()
|
||
|
prefix := p.prefixParselets[tok.kind]
|
||
|
if prefix == nil {
|
||
|
throwfPos(tok.pos, "unexpected token: %v", tok)
|
||
|
}
|
||
|
left := prefix(tok)
|
||
|
|
||
|
for precedence < p.precedenceOf(p.lexer.Peek()) {
|
||
|
tok := p.lexer.NextToken()
|
||
|
infix := p.infixParselets[tok.kind]
|
||
|
left = infix(left, tok)
|
||
|
}
|
||
|
|
||
|
return left
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parsePrefixElementary(tok token) *Expr {
|
||
|
return p.newExpr(tok2op[tok.kind], tok.pos)
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parseCharClass(op Operation, tok token) *Expr {
|
||
|
var endPos Position
|
||
|
p.charClass = p.charClass[:0]
|
||
|
for {
|
||
|
p.charClass = append(p.charClass, *p.parseExpr(0))
|
||
|
next := p.lexer.Peek()
|
||
|
if next.kind == tokRbracket {
|
||
|
endPos = next.pos
|
||
|
p.lexer.NextToken()
|
||
|
break
|
||
|
}
|
||
|
if next.kind == tokNone {
|
||
|
throwfPos(tok.pos, "unterminated '['")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
result := p.newExpr(op, combinePos(tok.pos, endPos))
|
||
|
result.Args = append(result.Args, p.charClass...)
|
||
|
return result
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parseMinus(left *Expr, tok token) *Expr {
|
||
|
if p.isValidCharRangeOperand(left) {
|
||
|
if p.lexer.Peek().kind != tokRbracket {
|
||
|
right := p.parseExpr(2)
|
||
|
return p.newExpr(OpCharRange, combinePos(left.Pos, right.Pos), left, right)
|
||
|
}
|
||
|
}
|
||
|
p.charClass = append(p.charClass, *left)
|
||
|
return p.newExpr(OpChar, tok.pos)
|
||
|
}
|
||
|
|
||
|
func (p *Parser) isValidCharRangeOperand(e *Expr) bool {
|
||
|
switch e.Op {
|
||
|
case OpEscapeHex, OpEscapeOctal, OpEscapeMeta, OpChar:
|
||
|
return true
|
||
|
case OpEscapeChar:
|
||
|
switch p.exprValue(e) {
|
||
|
case `\\`, `\|`, `\*`, `\+`, `\?`, `\.`, `\[`, `\^`, `\$`, `\(`, `\)`:
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parsePlus(left *Expr, tok token) *Expr {
|
||
|
op := OpPlus
|
||
|
switch left.Op {
|
||
|
case OpPlus, OpStar, OpQuestion, OpRepeat:
|
||
|
op = OpPossessive
|
||
|
}
|
||
|
return p.newExpr(op, combinePos(left.Pos, tok.pos), left)
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parseQuestion(left *Expr, tok token) *Expr {
|
||
|
op := OpQuestion
|
||
|
switch left.Op {
|
||
|
case OpPlus, OpStar, OpQuestion, OpRepeat:
|
||
|
op = OpNonGreedy
|
||
|
}
|
||
|
return p.newExpr(op, combinePos(left.Pos, tok.pos), left)
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parseAlt(left *Expr, tok token) *Expr {
|
||
|
var right *Expr
|
||
|
switch p.lexer.Peek().kind {
|
||
|
case tokRparen, tokNone:
|
||
|
// This is needed to handle `(x|)` syntax.
|
||
|
right = p.newEmpty(tok.pos)
|
||
|
default:
|
||
|
right = p.parseExpr(1)
|
||
|
}
|
||
|
if left.Op == OpAlt {
|
||
|
left.Args = append(left.Args, *right)
|
||
|
left.Pos.End = right.End()
|
||
|
return left
|
||
|
}
|
||
|
return p.newExpr(OpAlt, combinePos(left.Pos, right.Pos), left, right)
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parseGroupItem(tok token) *Expr {
|
||
|
if p.lexer.Peek().kind == tokRparen {
|
||
|
// This is needed to handle `() syntax.`
|
||
|
return p.newEmpty(tok.pos)
|
||
|
}
|
||
|
return p.parseExpr(0)
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parseGroup(op Operation, tok token) *Expr {
|
||
|
x := p.parseGroupItem(tok)
|
||
|
result := p.newExpr(op, tok.pos, x)
|
||
|
result.Pos.End = p.expect(tokRparen).End
|
||
|
return result
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parseNamedCapture(form Form, tok token) *Expr {
|
||
|
prefixLen := len("(?<")
|
||
|
if form == FormDefault {
|
||
|
prefixLen = len("(?P<")
|
||
|
}
|
||
|
name := p.newExpr(OpString, Position{
|
||
|
Begin: tok.pos.Begin + uint16(prefixLen),
|
||
|
End: tok.pos.End - uint16(len(">")),
|
||
|
})
|
||
|
x := p.parseGroupItem(tok)
|
||
|
result := p.newExprForm(OpNamedCapture, form, tok.pos, x, name)
|
||
|
result.Pos.End = p.expect(tokRparen).End
|
||
|
return result
|
||
|
}
|
||
|
|
||
|
func (p *Parser) parseGroupWithFlags(tok token) *Expr {
|
||
|
var result *Expr
|
||
|
val := p.out.Pattern[tok.pos.Begin+1 : tok.pos.End]
|
||
|
switch {
|
||
|
case !strings.HasSuffix(val, ":"):
|
||
|
flags := p.newExpr(OpString, Position{
|
||
|
Begin: tok.pos.Begin + uint16(len("(?")),
|
||
|
End: tok.pos.End,
|
||
|
})
|
||
|
result = p.newExpr(OpFlagOnlyGroup, tok.pos, flags)
|
||
|
case val == "?:":
|
||
|
x := p.parseGroupItem(tok)
|
||
|
result = p.newExpr(OpGroup, tok.pos, x)
|
||
|
default:
|
||
|
flags := p.newExpr(OpString, Position{
|
||
|
Begin: tok.pos.Begin + uint16(len("(?")),
|
||
|
End: tok.pos.End - uint16(len(":")),
|
||
|
})
|
||
|
x := p.parseGroupItem(tok)
|
||
|
result = p.newExpr(OpGroupWithFlags, tok.pos, x, flags)
|
||
|
}
|
||
|
result.Pos.End = p.expect(tokRparen).End
|
||
|
return result
|
||
|
}
|
||
|
|
||
|
func (p *Parser) precedenceOf(tok token) int {
|
||
|
switch tok.kind {
|
||
|
case tokPipe:
|
||
|
return 1
|
||
|
case tokConcat, tokMinus:
|
||
|
return 2
|
||
|
case tokPlus, tokStar, tokQuestion, tokRepeat:
|
||
|
return 3
|
||
|
default:
|
||
|
return 0
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (p *Parser) newPCRE(source string) (*RegexpPCRE, error) {
|
||
|
if source == "" {
|
||
|
return nil, errors.New("empty pattern: can't find delimiters")
|
||
|
}
|
||
|
|
||
|
delim := source[0]
|
||
|
endDelim := delim
|
||
|
switch delim {
|
||
|
case '(':
|
||
|
endDelim = ')'
|
||
|
case '{':
|
||
|
endDelim = '}'
|
||
|
case '[':
|
||
|
endDelim = ']'
|
||
|
case '<':
|
||
|
endDelim = '>'
|
||
|
case '\\':
|
||
|
return nil, errors.New("'\\' is not a valid delimiter")
|
||
|
default:
|
||
|
if isSpace(delim) {
|
||
|
return nil, errors.New("whitespace is not a valid delimiter")
|
||
|
}
|
||
|
if isAlphanumeric(delim) {
|
||
|
return nil, fmt.Errorf("'%c' is not a valid delimiter", delim)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
j := strings.LastIndexByte(source, endDelim)
|
||
|
if j == -1 {
|
||
|
return nil, fmt.Errorf("can't find '%c' ending delimiter", endDelim)
|
||
|
}
|
||
|
|
||
|
pcre := &RegexpPCRE{
|
||
|
Pattern: source[1:j],
|
||
|
Source: source,
|
||
|
Delim: [2]byte{delim, endDelim},
|
||
|
Modifiers: source[j+1:],
|
||
|
}
|
||
|
return pcre, nil
|
||
|
}
|
||
|
|
||
|
var tok2op = [256]Operation{
|
||
|
tokDollar: OpDollar,
|
||
|
tokCaret: OpCaret,
|
||
|
tokDot: OpDot,
|
||
|
tokChar: OpChar,
|
||
|
tokMinus: OpChar,
|
||
|
tokEscapeChar: OpEscapeChar,
|
||
|
tokEscapeMeta: OpEscapeMeta,
|
||
|
tokEscapeHex: OpEscapeHex,
|
||
|
tokEscapeOctal: OpEscapeOctal,
|
||
|
tokEscapeUni: OpEscapeUni,
|
||
|
tokPosixClass: OpPosixClass,
|
||
|
tokQ: OpQuote,
|
||
|
tokComment: OpComment,
|
||
|
}
|