gotosocial/vendor/modernc.org/gc/v3/scanner.go

1446 lines
27 KiB
Go

// Copyright 2022 The Gc Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gc // import "modernc.org/gc/v3"
import (
"bytes"
"fmt"
"go/token"
"path/filepath"
"strings"
"unicode"
"unicode/utf8"
"modernc.org/mathutil"
mtoken "modernc.org/token"
)
var (
_ Node = (*Token)(nil)
_ Node = (*nonode)(nil)
keywords = map[string]token.Token{
"break": BREAK,
"case": CASE,
"chan": CHAN,
"const": CONST,
"continue": CONTINUE,
"default": DEFAULT,
"defer": DEFER,
"else": ELSE,
"fallthrough": FALLTHROUGH,
"for": FOR,
"func": FUNC,
"go": GO,
"goto": GOTO,
"if": IF,
"import": IMPORT,
"interface": INTERFACE,
"map": MAP,
"package": PACKAGE,
"range": RANGE,
"return": RETURN,
"select": SELECT,
"struct": STRUCT,
"switch": SWITCH,
"type": TYPE,
"var": VAR,
}
lineCommentTag = []byte("line ")
znode = &nonode{}
)
type nonode struct{}
func (*nonode) Position() (r token.Position) { return r }
func (*nonode) Source(full bool) string { return "" }
// Token represents a lexeme, its position and its semantic value.
type Token struct { // 16 bytes on 64 bit arch
source *source
ch int32
index int32
}
// Ch returns which token t represents
func (t Token) Ch() token.Token { return token.Token(t.ch) }
// Source implements Node.
func (t Token) Source(full bool) string {
// trc("%10s %v: #%v sep %v, src %v, buf %v", tokSource(t.Ch()), t.Position(), t.index, t.source.toks[t.index].sep, t.source.toks[t.index].src, len(t.source.buf))
sep := t.Sep()
if !full && sep != "" {
sep = " "
}
src := t.Src()
if !full && strings.ContainsRune(src, '\n') {
src = " "
}
// trc("%q %q -> %q %q", t.Sep(), t.Src(), sep, src)
return sep + src
}
// Positions implements Node.
func (t Token) Position() (r token.Position) {
if t.source == nil {
return r
}
s := t.source
off := mathutil.MinInt32(int32(len(s.buf)), s.toks[t.index].src)
return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
}
// Prev returns the token preceding t or a zero value if no such token exists.
func (t Token) Prev() (r Token) {
if index := t.index - 1; index >= 0 {
s := t.source
return Token{source: s, ch: s.toks[index].ch, index: index}
}
return r
}
// Next returns the token following t or a zero value if no such token exists.
func (t Token) Next() (r Token) {
if index := t.index + 1; index < int32(len(t.source.toks)) {
s := t.source
return Token{source: s, ch: s.toks[index].ch, index: index}
}
return r
}
// Sep returns any separators, combined, preceding t.
func (t Token) Sep() string {
s := t.source
if p, ok := s.sepPatches[t.index]; ok {
return p
}
return string(s.buf[s.toks[t.index].sep:s.toks[t.index].src])
}
// SetSep sets t's separator.
func (t Token) SetSep(s string) {
src := t.source
if src.sepPatches == nil {
src.sepPatches = map[int32]string{}
}
src.sepPatches[t.index] = s
}
// Src returns t's source form.
func (t Token) Src() string {
s := t.source
if p, ok := s.srcPatches[t.index]; ok {
return p
}
if t.ch != int32(EOF) {
next := t.source.off
if t.index < int32(len(s.toks))-1 {
next = s.toks[t.index+1].sep
}
return string(s.buf[s.toks[t.index].src:next])
}
return ""
}
// SetSrc sets t's source form.
func (t Token) SetSrc(s string) {
src := t.source
if src.srcPatches == nil {
src.srcPatches = map[int32]string{}
}
src.srcPatches[t.index] = s
}
// IsValid reports t is a valid token. Zero value reports false.
func (t Token) IsValid() bool { return t.source != nil }
type tok struct { // 12 bytes
ch int32
sep int32
src int32
}
func (t *tok) token() token.Token { return token.Token(t.ch) }
func (t *tok) position(s *source) (r token.Position) {
off := mathutil.MinInt32(int32(len(s.buf)), t.src)
return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
}
// source represents a single Go source file, editor text buffer etc.
type source struct {
buf []byte
file *mtoken.File
name string
sepPatches map[int32]string
srcPatches map[int32]string
toks []tok
base int32
off int32
}
// 'buf' becomes owned by the result and must not be modified afterwards.
func newSource(name string, buf []byte) *source {
file := mtoken.NewFile(name, len(buf))
return &source{
buf: buf,
file: file,
name: name,
base: int32(file.Base()),
}
}
type ErrWithPosition struct {
pos token.Position
err error
}
func (e ErrWithPosition) String() string {
switch {
case e.pos.IsValid():
return fmt.Sprintf("%v: %v", e.pos, e.err)
default:
return fmt.Sprintf("%v", e.err)
}
}
type errList []ErrWithPosition
func (e errList) Err() (r error) {
if len(e) == 0 {
return nil
}
return e
}
func (e errList) Error() string {
w := 0
prev := ErrWithPosition{pos: token.Position{Offset: -1}}
for _, v := range e {
if v.pos.Line == 0 || v.pos.Offset != prev.pos.Offset || v.err.Error() != prev.err.Error() {
e[w] = v
w++
prev = v
}
}
var a []string
for _, v := range e {
a = append(a, fmt.Sprint(v))
}
return strings.Join(a, "\n")
}
func (e *errList) err(pos token.Position, msg string, args ...interface{}) {
if trcErrors {
trc("FAIL "+msg, args...)
}
switch {
case len(args) == 0:
*e = append(*e, ErrWithPosition{pos, fmt.Errorf("%s", msg)})
default:
*e = append(*e, ErrWithPosition{pos, fmt.Errorf(msg, args...)})
}
}
type scanner struct {
*source
dir string
errs errList
tok tok
last int32
errBudget int
c byte // Lookahead byte.
eof bool
isClosed bool
}
func newScanner(name string, buf []byte) *scanner {
dir, _ := filepath.Split(name)
r := &scanner{source: newSource(name, buf), errBudget: 10, dir: dir}
switch {
case len(buf) == 0:
r.eof = true
default:
r.c = buf[0]
if r.c == '\n' {
r.file.AddLine(int(r.base + r.off))
}
}
return r
}
func isDigit(c byte) bool { return c >= '0' && c <= '9' }
func isHexDigit(c byte) bool { return isDigit(c) || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' }
func isIDNext(c byte) bool { return isIDFirst(c) || isDigit(c) }
func isOctalDigit(c byte) bool { return c >= '0' && c <= '7' }
func isIDFirst(c byte) bool {
return c >= 'a' && c <= 'z' ||
c >= 'A' && c <= 'Z' ||
c == '_'
}
func (s *scanner) position() token.Position {
return token.Position(s.source.file.PositionFor(mtoken.Pos(s.base+s.off), true))
}
func (s *scanner) pos(off int32) token.Position {
return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
}
func (s *scanner) token() Token {
return Token{source: s.source, ch: s.tok.ch, index: int32(len(s.toks) - 1)}
}
func (s *scanner) err(off int32, msg string, args ...interface{}) {
if s.errBudget <= 0 {
s.close()
return
}
s.errBudget--
if n := int32(len(s.buf)); off >= n {
off = n
}
s.errs.err(s.pos(off), msg, args...)
}
func (s *scanner) close() {
if s.isClosed {
return
}
s.tok.ch = int32(ILLEGAL)
s.eof = true
s.isClosed = true
}
func (s *scanner) next() {
if s.eof {
return
}
s.off++
if int(s.off) == len(s.buf) {
s.c = 0
s.eof = true
return
}
s.c = s.buf[s.off]
if s.c == '\n' {
s.file.AddLine(int(s.base + s.off))
}
}
func (s *scanner) nextN(n int) {
if int(s.off) == len(s.buf)-n {
s.c = 0
s.eof = true
return
}
s.off += int32(n)
s.c = s.buf[s.off]
if s.c == '\n' {
s.file.AddLine(int(s.base + s.off))
}
}
func (s *scanner) scan() (r bool) {
if s.isClosed {
return false
}
s.last = s.tok.ch
s.tok.sep = s.off
s.tok.ch = -1
for {
if r = s.scan0(); !r || s.tok.ch >= 0 {
s.toks = append(s.toks, s.tok)
// trc("", dump(s.token()))
return r
}
}
}
func (s *scanner) scan0() (r bool) {
s.tok.src = mathutil.MinInt32(s.off, int32(len(s.buf)))
switch s.c {
case ' ', '\t', '\r', '\n':
// White space, formed from spaces (U+0020), horizontal tabs (U+0009), carriage
// returns (U+000D), and newlines (U+000A), is ignored except as it separates
// tokens that would otherwise combine into a single token.
if s.c == '\n' && s.injectSemi() {
return true
}
s.next()
return true
case '/':
off := s.off
s.next()
switch s.c {
case '=':
s.next()
s.tok.ch = int32(QUO_ASSIGN)
case '/':
// Line comments start with the character sequence // and stop at the end of
// the line.
s.next()
s.lineComment(off)
return true
case '*':
// General comments start with the character sequence /* and stop with the
// first subsequent character sequence */.
s.next()
s.generalComment(off)
return true
default:
s.tok.ch = int32(QUO)
}
case '(':
s.tok.ch = int32(LPAREN)
s.next()
case ')':
s.tok.ch = int32(RPAREN)
s.next()
case '[':
s.tok.ch = int32(LBRACK)
s.next()
case ']':
s.tok.ch = int32(RBRACK)
s.next()
case '{':
s.tok.ch = int32(LBRACE)
s.next()
case '}':
s.tok.ch = int32(RBRACE)
s.next()
case ',':
s.tok.ch = int32(COMMA)
s.next()
case ';':
s.tok.ch = int32(SEMICOLON)
s.next()
case '~':
s.tok.ch = int32(TILDE)
s.next()
case '"':
off := s.off
s.next()
s.stringLiteral(off)
case '\'':
off := s.off
s.next()
s.runeLiteral(off)
case '`':
s.next()
for {
switch {
case s.c == '`':
s.next()
s.tok.ch = int32(STRING)
return true
case s.eof:
s.err(s.off, "raw string literal not terminated")
s.tok.ch = int32(STRING)
return true
case s.c == 0:
panic(todo("%v: %#U", s.position(), s.c))
default:
s.next()
}
}
case '.':
s.next()
off := s.off
if isDigit(s.c) {
s.dot(false, true)
return true
}
if s.c != '.' {
s.tok.ch = int32(PERIOD)
return true
}
s.next()
if s.c != '.' {
s.off = off
s.c = '.'
s.tok.ch = int32(PERIOD)
return true
}
s.next()
s.tok.ch = int32(ELLIPSIS)
return true
case '%':
s.next()
switch s.c {
case '=':
s.next()
s.tok.ch = int32(REM_ASSIGN)
default:
s.tok.ch = int32(REM)
}
case '*':
s.next()
switch s.c {
case '=':
s.next()
s.tok.ch = int32(MUL_ASSIGN)
default:
s.tok.ch = int32(MUL)
}
case '^':
s.next()
switch s.c {
case '=':
s.next()
s.tok.ch = int32(XOR_ASSIGN)
default:
s.tok.ch = int32(XOR)
}
case '+':
s.next()
switch s.c {
case '+':
s.next()
s.tok.ch = int32(INC)
case '=':
s.next()
s.tok.ch = int32(ADD_ASSIGN)
default:
s.tok.ch = int32(ADD)
}
case '-':
s.next()
switch s.c {
case '-':
s.next()
s.tok.ch = int32(DEC)
case '=':
s.next()
s.tok.ch = int32(SUB_ASSIGN)
default:
s.tok.ch = int32(SUB)
}
case ':':
s.next()
switch {
case s.c == '=':
s.next()
s.tok.ch = int32(DEFINE)
default:
s.tok.ch = int32(COLON)
}
case '=':
s.next()
switch {
case s.c == '=':
s.next()
s.tok.ch = int32(EQL)
default:
s.tok.ch = int32(ASSIGN)
}
case '!':
s.next()
switch {
case s.c == '=':
s.next()
s.tok.ch = int32(NEQ)
default:
s.tok.ch = int32(NOT)
}
case '>':
s.next()
switch s.c {
case '=':
s.next()
s.tok.ch = int32(GEQ)
case '>':
s.next()
switch s.c {
case '=':
s.next()
s.tok.ch = int32(SHR_ASSIGN)
default:
s.tok.ch = int32(SHR)
}
default:
s.tok.ch = int32(GTR)
}
case '<':
s.next()
switch s.c {
case '=':
s.next()
s.tok.ch = int32(LEQ)
case '<':
s.next()
switch s.c {
case '=':
s.next()
s.tok.ch = int32(SHL_ASSIGN)
default:
s.tok.ch = int32(SHL)
}
case '-':
s.next()
s.tok.ch = int32(ARROW)
default:
s.tok.ch = int32(LSS)
}
case '|':
s.next()
switch s.c {
case '|':
s.next()
s.tok.ch = int32(LOR)
case '=':
s.next()
s.tok.ch = int32(OR_ASSIGN)
default:
s.tok.ch = int32(OR)
}
case '&':
s.next()
switch s.c {
case '&':
s.next()
s.tok.ch = int32(LAND)
case '^':
s.next()
switch s.c {
case '=':
s.next()
s.tok.ch = int32(AND_NOT_ASSIGN)
default:
s.tok.ch = int32(AND_NOT)
}
case '=':
s.next()
s.tok.ch = int32(AND_ASSIGN)
default:
s.tok.ch = int32(AND)
}
default:
switch {
case isIDFirst(s.c):
s.next()
s.identifierOrKeyword()
case isDigit(s.c):
s.numericLiteral()
case s.c >= 0x80:
off := s.off
switch r := s.rune(); {
case unicode.IsLetter(r):
s.identifierOrKeyword()
case r == 0xfeff:
if off == 0 { // Ignore BOM, but only at buffer start.
return true
}
s.err(off, "illegal byte order mark")
s.tok.ch = int32(ILLEGAL)
default:
s.err(s.off, "illegal character %#U", r)
s.tok.ch = int32(ILLEGAL)
}
case s.eof:
if s.injectSemi() {
return true
}
s.close()
s.tok.ch = int32(EOF)
s.tok.sep = mathutil.MinInt32(s.tok.sep, s.tok.src)
return false
// case s.c == 0:
// panic(todo("%v: %#U", s.position(), s.c))
default:
s.err(s.off, "illegal character %#U", s.c)
s.next()
s.tok.ch = int32(ILLEGAL)
}
}
return true
}
func (s *scanner) runeLiteral(off int32) {
// Leading ' consumed.
ok := 0
s.tok.ch = int32(CHAR)
expOff := int32(-1)
if s.eof {
s.err(off, "rune literal not terminated")
return
}
for {
switch s.c {
case '\\':
ok++
s.next()
switch s.c {
case '\'', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v':
s.next()
case 'x', 'X':
s.next()
for i := 0; i < 2; i++ {
if s.c == '\'' {
if i != 2 {
s.err(s.off, "illegal character %#U in escape sequence", s.c)
}
s.next()
return
}
if !isHexDigit(s.c) {
s.err(s.off, "illegal character %#U in escape sequence", s.c)
break
}
s.next()
}
case 'u':
s.u(4)
case 'U':
s.u(8)
default:
switch {
case s.eof:
s.err(s.base+s.off, "escape sequence not terminated")
return
case isOctalDigit(s.c):
for i := 0; i < 3; i++ {
s.next()
if s.c == '\'' {
if i != 2 {
s.err(s.off, "illegal character %#U in escape sequence", s.c)
}
s.next()
return
}
if !isOctalDigit(s.c) {
s.err(s.off, "illegal character %#U in escape sequence", s.c)
break
}
}
default:
s.err(s.off, "unknown escape sequence")
}
}
case '\'':
s.next()
if ok != 1 {
s.err(off, "illegal rune literal")
}
return
case '\t':
s.next()
ok++
default:
switch {
case s.eof:
switch {
case ok != 0:
s.err(expOff, "rune literal not terminated")
default:
s.err(s.base+s.off, "rune literal not terminated")
}
return
case s.c == 0:
panic(todo("%v: %#U", s.position(), s.c))
case s.c < ' ':
ok++
s.err(s.off, "non-printable character: %#U", s.c)
s.next()
case s.c >= 0x80:
ok++
off := s.off
if c := s.rune(); c == 0xfeff {
s.err(off, "illegal byte order mark")
}
default:
ok++
s.next()
}
}
if ok != 0 && expOff < 0 {
expOff = s.off
if s.eof {
expOff++
}
}
}
}
func (s *scanner) stringLiteral(off int32) {
// Leadind " consumed.
s.tok.ch = int32(STRING)
for {
switch {
case s.c == '"':
s.next()
return
case s.c == '\\':
s.next()
switch s.c {
case '"', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v':
s.next()
continue
case 'x', 'X':
s.next()
if !isHexDigit(s.c) {
panic(todo("%v: %#U", s.position(), s.c))
}
s.next()
if !isHexDigit(s.c) {
panic(todo("%v: %#U", s.position(), s.c))
}
s.next()
continue
case 'u':
s.u(4)
continue
case 'U':
s.u(8)
continue
default:
switch {
case isOctalDigit(s.c):
s.next()
if isOctalDigit(s.c) {
s.next()
}
if isOctalDigit(s.c) {
s.next()
}
continue
default:
s.err(off-1, "unknown escape sequence")
}
}
case s.c == '\n':
fallthrough
case s.eof:
s.err(off, "string literal not terminated")
return
case s.c == 0:
s.err(s.off, "illegal character NUL")
}
switch {
case s.c >= 0x80:
off := s.off
if s.rune() == 0xfeff {
s.err(off, "illegal byte order mark")
}
continue
}
s.next()
}
}
func (s *scanner) u(n int) (r rune) {
// Leading u/U not consumed.
s.next()
off := s.off
for i := 0; i < n; i++ {
switch {
case isHexDigit(s.c):
var n rune
switch {
case s.c >= '0' && s.c <= '9':
n = rune(s.c) - '0'
case s.c >= 'a' && s.c <= 'f':
n = rune(s.c) - 'a' + 10
case s.c >= 'A' && s.c <= 'F':
n = rune(s.c) - 'A' + 10
}
r = 16*r + n
default:
switch {
case s.eof:
s.err(s.base+s.off, "escape sequence not terminated")
default:
s.err(s.off, "illegal character %#U in escape sequence", s.c)
}
return r
}
s.next()
}
if r < 0 || r > unicode.MaxRune || r >= 0xd800 && r <= 0xdfff {
s.err(off-1, "escape sequence is invalid Unicode code point")
}
return r
}
func (s *scanner) identifierOrKeyword() {
out:
for {
switch {
case isIDNext(s.c):
s.next()
case s.c >= 0x80:
off := s.off
c := s.c
switch r := s.rune(); {
case unicode.IsLetter(r) || unicode.IsDigit(r):
// already consumed
default:
s.off = off
s.c = c
break out
}
case s.eof:
break out
case s.c == 0:
s.err(s.off, "illegal character NUL")
break out
default:
break out
}
}
if s.tok.ch = int32(keywords[string(s.buf[s.tok.src:s.off])]); s.tok.ch == 0 {
s.tok.ch = int32(IDENT)
}
}
func (s *scanner) numericLiteral() {
// Leading decimal digit not consumed.
var hasHexMantissa, needFrac bool
more:
switch s.c {
case '0':
s.next()
switch s.c {
case '.':
// nop
case 'b', 'B':
s.next()
s.binaryLiteral()
return
case 'e', 'E':
s.exponent()
s.tok.ch = int32(FLOAT)
return
case 'p', 'P':
s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
s.exponent()
s.tok.ch = int32(FLOAT)
return
case 'o', 'O':
s.next()
s.octalLiteral()
return
case 'x', 'X':
hasHexMantissa = true
needFrac = true
s.tok.ch = int32(INT)
s.next()
if s.c == '.' {
s.next()
s.dot(hasHexMantissa, needFrac)
return
}
if s.hexadecimals() == 0 {
s.err(s.base+s.off, "hexadecimal literal has no digits")
return
}
needFrac = false
case 'i':
s.next()
s.tok.ch = int32(IMAG)
return
default:
invalidOff := int32(-1)
var invalidDigit byte
for {
if s.c == '_' {
for n := 0; s.c == '_'; n++ {
if n == 1 {
s.err(s.off, "'_' must separate successive digits")
}
s.next()
}
if !isDigit(s.c) {
s.err(s.off-1, "'_' must separate successive digits")
}
}
if isOctalDigit(s.c) {
s.next()
continue
}
if isDigit(s.c) {
if invalidOff < 0 {
invalidOff = s.off
invalidDigit = s.c
}
s.next()
continue
}
break
}
switch s.c {
case '.', 'e', 'E', 'i':
break more
}
if isDigit(s.c) {
break more
}
if invalidOff > 0 {
s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit)
}
s.tok.ch = int32(INT)
return
}
default:
s.decimals()
}
switch s.c {
case '.':
s.next()
s.dot(hasHexMantissa, needFrac)
case 'p', 'P':
if !hasHexMantissa {
s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
}
fallthrough
case 'e', 'E':
s.exponent()
if s.c == 'i' {
s.next()
s.tok.ch = int32(IMAG)
return
}
s.tok.ch = int32(FLOAT)
case 'i':
s.next()
s.tok.ch = int32(IMAG)
default:
s.tok.ch = int32(INT)
}
}
func (s *scanner) octalLiteral() {
// Leading 0o consumed.
ok := false
invalidOff := int32(-1)
var invalidDigit byte
s.tok.ch = int32(INT)
for {
for n := 0; s.c == '_'; n++ {
if n == 1 {
s.err(s.off, "'_' must separate successive digits")
}
s.next()
}
switch s.c {
case '0', '1', '2', '3', '4', '5', '6', '7':
s.next()
ok = true
case '8', '9':
if invalidOff < 0 {
invalidOff = s.off
invalidDigit = s.c
}
s.next()
case '.':
s.tok.ch = int32(FLOAT)
s.err(s.off, "invalid radix point in octal literal")
s.next()
case 'e', 'E':
s.tok.ch = int32(FLOAT)
s.err(s.off, "'%c' exponent requires decimal mantissa", s.c)
s.exponent()
case 'p', 'P':
s.tok.ch = int32(FLOAT)
s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
s.exponent()
default:
switch {
case !ok:
s.err(s.base+s.off, "octal literal has no digits")
case invalidOff > 0:
s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit)
}
if s.c == 'i' {
s.next()
s.tok.ch = int32(IMAG)
}
return
}
}
}
func (s *scanner) binaryLiteral() {
// Leading 0b consumed.
ok := false
invalidOff := int32(-1)
var invalidDigit byte
s.tok.ch = int32(INT)
for {
for n := 0; s.c == '_'; n++ {
if n == 1 {
s.err(s.off, "'_' must separate successive digits")
}
s.next()
}
switch s.c {
case '0', '1':
s.next()
ok = true
case '.':
s.tok.ch = int32(FLOAT)
s.err(s.off, "invalid radix point in binary literal")
s.next()
case 'e', 'E':
s.tok.ch = int32(FLOAT)
s.err(s.off, "'%c' exponent requires decimal mantissa", s.c)
s.exponent()
case 'p', 'P':
s.tok.ch = int32(FLOAT)
s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
s.exponent()
default:
if isDigit(s.c) {
if invalidOff < 0 {
invalidOff = s.off
invalidDigit = s.c
}
s.next()
continue
}
switch {
case !ok:
s.err(s.base+s.off, "binary literal has no digits")
case invalidOff > 0:
s.err(invalidOff, "invalid digit '%c' in binary literal", invalidDigit)
}
if s.c == 'i' {
s.next()
s.tok.ch = int32(IMAG)
}
return
}
}
}
func (s *scanner) generalComment(off int32) (injectSemi bool) {
// Leading /* consumed
off0 := s.off - 2
var nl bool
for {
switch {
case s.c == '*':
s.next()
switch s.c {
case '/':
s.lineInfo(off0, s.off+1)
s.next()
if nl {
return s.injectSemi()
}
return false
}
case s.c == '\n':
nl = true
s.next()
case s.eof:
s.tok.ch = 0
s.err(off, "comment not terminated")
return true
case s.c == 0:
panic(todo("%v: %#U", s.position(), s.c))
default:
s.next()
}
}
}
func (s *scanner) lineComment(off int32) (injectSemi bool) {
// Leading // consumed
off0 := s.off - 2
for {
switch {
case s.c == '\n':
s.lineInfo(off0, s.off+1)
if s.injectSemi() {
return true
}
s.next()
return false
case s.c >= 0x80:
if c := s.rune(); c == 0xfeff {
s.err(off+2, "illegal byte order mark")
}
case s.eof:
s.off++
if s.injectSemi() {
return true
}
return false
case s.c == 0:
return false
default:
s.next()
}
}
}
func (s *scanner) lineInfo(off, next int32) {
if off != 0 && s.buf[off+1] != '*' && s.buf[off-1] != '\n' && s.buf[off-1] != '\r' {
return
}
str := s.buf[off:next]
if !bytes.HasPrefix(str[len("//"):], lineCommentTag) {
return
}
switch {
case str[1] == '*':
str = str[:len(str)-len("*/")]
default:
str = str[:len(str)-len("\n")]
}
str = str[len("//"):]
str, ln, ok := s.lineInfoNum(str[len("line "):])
col := 0
if ok == liBadNum || ok == liNoNum {
return
}
hasCol := false
var n int
if str, n, ok = s.lineInfoNum(str); ok == liBadNum {
return
}
if ok != liNoNum {
col = ln
ln = n
hasCol = true
}
fn := strings.TrimSpace(string(str))
switch {
case fn == "" && hasCol:
fn = s.pos(off).Filename
case fn != "":
fn = filepath.Clean(fn)
if !filepath.IsAbs(fn) {
fn = filepath.Join(s.dir, fn)
}
}
// trc("set %v %q %v %v", next, fn, ln, col)
s.file.AddLineColumnInfo(int(next), fn, ln, col)
}
const (
liNoNum = iota
liBadNum
liOK
)
func (s *scanner) lineInfoNum(str []byte) (_ []byte, n, r int) {
// trc("==== %q", str)
x := len(str) - 1
if x < 0 || !isDigit(str[x]) {
return str, 0, liNoNum
}
mul := 1
for x > 0 && isDigit(str[x]) {
n += mul * (int(str[x]) - '0')
mul *= 10
x--
if n < 0 {
return str, 0, liBadNum
}
}
if x < 0 || str[x] != ':' {
return str, 0, liBadNum
}
// trc("---- %q %v %v", str[:x], n, liOK)
return str[:x], n, liOK
}
func (s *scanner) rune() rune {
switch r, sz := utf8.DecodeRune(s.buf[s.off:]); {
case r == utf8.RuneError && sz == 0:
panic(todo("%v: %#U", s.position(), s.c))
case r == utf8.RuneError && sz == 1:
s.err(s.off, "illegal UTF-8 encoding")
s.next()
return r
default:
s.nextN(sz)
return r
}
}
func (s *scanner) dot(hasHexMantissa, needFrac bool) {
// '.' already consumed
switch {
case hasHexMantissa:
if s.hexadecimals() == 0 && needFrac {
s.err(s.off, "hexadecimal literal has no digits")
}
switch s.c {
case 'p', 'P':
// ok
default:
s.err(s.off, "hexadecimal mantissa requires a 'p' exponent")
}
default:
if s.decimals() == 0 && needFrac {
panic(todo("%v: %#U", s.position(), s.c))
}
}
switch s.c {
case 'p', 'P':
if !hasHexMantissa {
s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
}
fallthrough
case 'e', 'E':
s.exponent()
if s.c == 'i' {
s.next()
s.tok.ch = int32(IMAG)
return
}
s.tok.ch = int32(FLOAT)
case 'i':
s.next()
s.tok.ch = int32(IMAG)
default:
s.tok.ch = int32(FLOAT)
}
}
func (s *scanner) exponent() {
// Leanding e or E not consumed.
s.next()
switch s.c {
case '+', '-':
s.next()
}
if !isDigit(s.c) {
s.err(s.base+s.off, "exponent has no digits")
return
}
s.decimals()
}
func (s *scanner) decimals() (r int) {
first := true
for {
switch {
case isDigit(s.c):
first = false
s.next()
r++
case s.c == '_':
for n := 0; s.c == '_'; n++ {
if first || n == 1 {
s.err(s.off, "'_' must separate successive digits")
}
s.next()
}
if !isDigit(s.c) {
s.err(s.off-1, "'_' must separate successive digits")
}
default:
return r
}
}
}
func (s *scanner) hexadecimals() (r int) {
for {
switch {
case isHexDigit(s.c):
s.next()
r++
case s.c == '_':
for n := 0; s.c == '_'; n++ {
if n == 1 {
s.err(s.off, "'_' must separate successive digits")
}
s.next()
}
if !isHexDigit(s.c) {
s.err(s.off-1, "'_' must separate successive digits")
}
default:
return r
}
}
}
// When the input is broken into tokens, a semicolon is automatically inserted
// into the token stream immediately after a line's final token if that token
// is
//
// - an identifier
// - an integer, floating-point, imaginary, rune, or string literal
// - one of the keywords break, continue, fallthrough, or return
// - one of the operators and punctuation ++, --, ), ], or }
func (s *scanner) injectSemi() bool {
switch token.Token(s.last) {
case
IDENT, INT, FLOAT, IMAG, CHAR, STRING,
BREAK, CONTINUE, FALLTHROUGH, RETURN,
INC, DEC, RPAREN, RBRACK, RBRACE:
s.tok.ch = int32(SEMICOLON)
s.last = 0
if s.c == '\n' {
s.next()
}
return true
}
s.last = 0
return false
}