// Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved. // Use of this source code is governed by a MIT license found in the LICENSE file. package codec import ( "bufio" "bytes" "io" "strings" ) // decReader abstracts the reading source, allowing implementations that can // read from an io.Reader or directly off a byte slice with zero-copying. type decReader interface { // readx will return a view of the []byte if decoding from a []byte, OR // read into the implementation scratch buffer if possible i.e. n < len(scratchbuf), OR // create a new []byte and read into that readx(n uint) []byte readb([]byte) readn1() byte readn2() [2]byte readn3() [3]byte readn4() [4]byte readn8() [8]byte // readn1eof() (v uint8, eof bool) // // read up to 8 bytes at a time // readn(num uint8) (v [8]byte) numread() uint // number of bytes read // skip any whitespace characters, and return the first non-matching byte skipWhitespace() (token byte) // jsonReadNum will include last read byte in first element of slice, // and continue numeric characters until it sees a non-numeric char // or EOF. If it sees a non-numeric character, it will unread that. jsonReadNum() []byte // jsonReadAsisChars will read json plain characters (anything but " or \) // and return a slice terminated by a non-json asis character. jsonReadAsisChars() []byte // skip will skip any byte that matches, and return the first non-matching byte // skip(accept *bitset256) (token byte) // readTo will read any byte that matches, stopping once no-longer matching. // readTo(accept *bitset256) (out []byte) // readUntil will read, only stopping once it matches the 'stop' byte (which it excludes). readUntil(stop byte) (out []byte) } // ------------------------------------------------ type unreadByteStatus uint8 // unreadByteStatus goes from // undefined (when initialized) -- (read) --> canUnread -- (unread) --> canRead ... const ( unreadByteUndefined unreadByteStatus = iota unreadByteCanRead unreadByteCanUnread ) // const defBufReaderSize = 4096 // -------------------- // ioReaderByteScanner contains the io.Reader and io.ByteScanner interfaces type ioReaderByteScanner interface { io.Reader io.ByteScanner // ReadByte() (byte, error) // UnreadByte() error // Read(p []byte) (n int, err error) } // ioReaderByteScannerT does a simple wrapper of a io.ByteScanner // over a io.Reader type ioReaderByteScannerT struct { r io.Reader l byte // last byte ls unreadByteStatus // last byte status _ [2]byte // padding b [4]byte // tiny buffer for reading single bytes } func (z *ioReaderByteScannerT) ReadByte() (c byte, err error) { if z.ls == unreadByteCanRead { z.ls = unreadByteCanUnread c = z.l } else { _, err = z.Read(z.b[:1]) c = z.b[0] } return } func (z *ioReaderByteScannerT) UnreadByte() (err error) { switch z.ls { case unreadByteCanUnread: z.ls = unreadByteCanRead case unreadByteCanRead: err = errDecUnreadByteLastByteNotRead case unreadByteUndefined: err = errDecUnreadByteNothingToRead default: err = errDecUnreadByteUnknown } return } func (z *ioReaderByteScannerT) Read(p []byte) (n int, err error) { if len(p) == 0 { return } var firstByte bool if z.ls == unreadByteCanRead { z.ls = unreadByteCanUnread p[0] = z.l if len(p) == 1 { n = 1 return } firstByte = true p = p[1:] } n, err = z.r.Read(p) if n > 0 { if err == io.EOF && n == len(p) { err = nil // read was successful, so postpone EOF (till next time) } z.l = p[n-1] z.ls = unreadByteCanUnread } if firstByte { n++ } return } func (z *ioReaderByteScannerT) reset(r io.Reader) { z.r = r z.ls = unreadByteUndefined z.l = 0 } // ioDecReader is a decReader that reads off an io.Reader. type ioDecReader struct { rr ioReaderByteScannerT // the reader passed in, wrapped into a reader+bytescanner n uint // num read blist *bytesFreelist bufr []byte // buffer for readTo/readUntil br ioReaderByteScanner // main reader used for Read|ReadByte|UnreadByte bb *bufio.Reader // created internally, and reused on reset if needed x [64 + 40]byte // for: get struct field name, swallow valueTypeBytes, etc } func (z *ioDecReader) reset(r io.Reader, bufsize int, blist *bytesFreelist) { z.blist = blist z.n = 0 z.bufr = z.blist.check(z.bufr, 256) z.br = nil var ok bool if bufsize <= 0 { z.br, ok = r.(ioReaderByteScanner) if !ok { z.rr.reset(r) z.br = &z.rr } return } // bufsize > 0 ... // if bytes.[Buffer|Reader], no value in adding extra buffer // if bufio.Reader, no value in extra buffer unless size changes switch bb := r.(type) { case *strings.Reader: z.br = bb case *bytes.Buffer: z.br = bb case *bytes.Reader: z.br = bb case *bufio.Reader: if bb.Size() == bufsize { z.br = bb } } if z.br == nil { if z.bb != nil && z.bb.Size() == bufsize { z.bb.Reset(r) } else { z.bb = bufio.NewReaderSize(r, bufsize) } z.br = z.bb } } func (z *ioDecReader) numread() uint { return z.n } func (z *ioDecReader) readn1() (b uint8) { b, err := z.br.ReadByte() halt.onerror(err) z.n++ return } func (z *ioDecReader) readn2() (bs [2]byte) { z.readb(bs[:]) return } func (z *ioDecReader) readn3() (bs [3]byte) { z.readb(bs[:]) return } func (z *ioDecReader) readn4() (bs [4]byte) { z.readb(bs[:]) return } func (z *ioDecReader) readn8() (bs [8]byte) { z.readb(bs[:]) return } func (z *ioDecReader) readx(n uint) (bs []byte) { if n == 0 { return zeroByteSlice } if n < uint(len(z.x)) { bs = z.x[:n] } else { bs = make([]byte, n) } nn, err := readFull(z.br, bs) z.n += nn halt.onerror(err) return } func (z *ioDecReader) readb(bs []byte) { if len(bs) == 0 { return } nn, err := readFull(z.br, bs) z.n += nn halt.onerror(err) } // func (z *ioDecReader) readn1eof() (b uint8, eof bool) { // b, err := z.br.ReadByte() // if err == nil { // z.n++ // } else if err == io.EOF { // eof = true // } else { // halt.onerror(err) // } // return // } func (z *ioDecReader) jsonReadNum() (bs []byte) { z.unreadn1() z.bufr = z.bufr[:0] LOOP: // i, eof := z.readn1eof() i, err := z.br.ReadByte() if err == io.EOF { return z.bufr } if err != nil { halt.onerror(err) } z.n++ if isNumberChar(i) { z.bufr = append(z.bufr, i) goto LOOP } z.unreadn1() return z.bufr } func (z *ioDecReader) jsonReadAsisChars() (bs []byte) { z.bufr = z.bufr[:0] LOOP: i := z.readn1() z.bufr = append(z.bufr, i) if i == '"' || i == '\\' { return z.bufr } goto LOOP } func (z *ioDecReader) skipWhitespace() (token byte) { LOOP: token = z.readn1() if isWhitespaceChar(token) { goto LOOP } return } // func (z *ioDecReader) readUntil(stop byte) []byte { // z.bufr = z.bufr[:0] // LOOP: // token := z.readn1() // z.bufr = append(z.bufr, token) // if token == stop { // return z.bufr[:len(z.bufr)-1] // } // goto LOOP // } func (z *ioDecReader) readUntil(stop byte) []byte { z.bufr = z.bufr[:0] LOOP: token := z.readn1() if token == stop { return z.bufr } z.bufr = append(z.bufr, token) goto LOOP } func (z *ioDecReader) unreadn1() { err := z.br.UnreadByte() halt.onerror(err) z.n-- } // ------------------------------------ // bytesDecReader is a decReader that reads off a byte slice with zero copying // // Note: we do not try to convert index'ing out of bounds to an io error. // instead, we let it bubble up to the exported Encode/Decode method // and recover it as an io error. // // Every function here MUST defensively check bounds either explicitly // or via a bounds check. // // see panicValToErr(...) function in helper.go. type bytesDecReader struct { b []byte // data c uint // cursor } func (z *bytesDecReader) reset(in []byte) { z.b = in[:len(in):len(in)] // reslicing must not go past capacity z.c = 0 } func (z *bytesDecReader) numread() uint { return z.c } // Note: slicing from a non-constant start position is more expensive, // as more computation is required to decipher the pointer start position. // However, we do it only once, and it's better than reslicing both z.b and return value. func (z *bytesDecReader) readx(n uint) (bs []byte) { // x := z.c + n // bs = z.b[z.c:x] // z.c = x bs = z.b[z.c : z.c+n] z.c += n return } func (z *bytesDecReader) readb(bs []byte) { copy(bs, z.readx(uint(len(bs)))) } // MARKER: do not use this - as it calls into memmove (as the size of data to move is unknown) // func (z *bytesDecReader) readnn(bs []byte, n uint) { // x := z.c // copy(bs, z.b[x:x+n]) // z.c += n // } // func (z *bytesDecReader) readn(num uint8) (bs [8]byte) { // x := z.c + uint(num) // copy(bs[:], z.b[z.c:x]) // slice z.b completely, so we get bounds error if past // z.c = x // return // } // func (z *bytesDecReader) readn1() uint8 { // z.c++ // return z.b[z.c-1] // } // MARKER: readn{1,2,3,4,8} should throw an out of bounds error if past length. // MARKER: readn1: explicitly ensure bounds check is done // MARKER: readn{2,3,4,8}: ensure you slice z.b completely so we get bounds error if past end. func (z *bytesDecReader) readn1() (v uint8) { v = z.b[z.c] z.c++ return } func (z *bytesDecReader) readn2() (bs [2]byte) { // copy(bs[:], z.b[z.c:z.c+2]) // bs[1] = z.b[z.c+1] // bs[0] = z.b[z.c] bs = okBytes2(z.b[z.c : z.c+2]) z.c += 2 return } func (z *bytesDecReader) readn3() (bs [3]byte) { // copy(bs[1:], z.b[z.c:z.c+3]) bs = okBytes3(z.b[z.c : z.c+3]) z.c += 3 return } func (z *bytesDecReader) readn4() (bs [4]byte) { // copy(bs[:], z.b[z.c:z.c+4]) bs = okBytes4(z.b[z.c : z.c+4]) z.c += 4 return } func (z *bytesDecReader) readn8() (bs [8]byte) { // copy(bs[:], z.b[z.c:z.c+8]) bs = okBytes8(z.b[z.c : z.c+8]) z.c += 8 return } func (z *bytesDecReader) jsonReadNum() []byte { z.c-- // unread i := z.c LOOP: // gracefully handle end of slice, as end of stream is meaningful here if i < uint(len(z.b)) && isNumberChar(z.b[i]) { i++ goto LOOP } z.c, i = i, z.c // MARKER: 20230103: byteSliceOf here prevents inlining of jsonReadNum // return byteSliceOf(z.b, i, z.c) return z.b[i:z.c] } func (z *bytesDecReader) jsonReadAsisChars() []byte { i := z.c LOOP: token := z.b[i] i++ if token == '"' || token == '\\' { z.c, i = i, z.c return byteSliceOf(z.b, i, z.c) // return z.b[i:z.c] } goto LOOP } func (z *bytesDecReader) skipWhitespace() (token byte) { i := z.c LOOP: token = z.b[i] if isWhitespaceChar(token) { i++ goto LOOP } z.c = i + 1 return } func (z *bytesDecReader) readUntil(stop byte) (out []byte) { i := z.c LOOP: if z.b[i] == stop { out = byteSliceOf(z.b, z.c, i) // out = z.b[z.c:i] z.c = i + 1 return } i++ goto LOOP } // -------------- type decRd struct { rb bytesDecReader ri *ioDecReader decReader bytes bool // is bytes reader // MARKER: these fields below should belong directly in Encoder. // we pack them here for space efficiency and cache-line optimization. mtr bool // is maptype a known type? str bool // is slicetype a known type? be bool // is binary encoding js bool // is json handle jsms bool // is json handle, and MapKeyAsString cbor bool // is cbor handle cbreak bool // is a check breaker } // From out benchmarking, we see the following impact performance: // // - functions that are too big to inline // - interface calls (as no inlining can occur) // // decRd is designed to embed a decReader, and then re-implement some of the decReader // methods using a conditional branch. // // We only override the ones where the bytes version is inlined AND the wrapper method // (containing the bytes version alongside a conditional branch) is also inlined. // // We use ./run.sh -z to check. // // Right now, only numread and "carefully crafted" readn1 can be inlined. func (z *decRd) numread() uint { if z.bytes { return z.rb.numread() } return z.ri.numread() } func (z *decRd) readn1() (v uint8) { if z.bytes { // return z.rb.readn1() // MARKER: calling z.rb.readn1() prevents decRd.readn1 from being inlined. // copy code, to manually inline and explicitly return here. // Keep in sync with bytesDecReader.readn1 v = z.rb.b[z.rb.c] z.rb.c++ return } return z.ri.readn1() } // func (z *decRd) readn4() [4]byte { // if z.bytes { // return z.rb.readn4() // } // return z.ri.readn4() // } // func (z *decRd) readn3() [3]byte { // if z.bytes { // return z.rb.readn3() // } // return z.ri.readn3() // } // func (z *decRd) skipWhitespace() byte { // if z.bytes { // return z.rb.skipWhitespace() // } // return z.ri.skipWhitespace() // } type devNullReader struct{} func (devNullReader) Read(p []byte) (int, error) { return 0, io.EOF } func (devNullReader) Close() error { return nil } func readFull(r io.Reader, bs []byte) (n uint, err error) { var nn int for n < uint(len(bs)) && err == nil { nn, err = r.Read(bs[n:]) if nn > 0 { if err == io.EOF { // leave EOF for next time err = nil } n += uint(nn) } } // do not do this below - it serves no purpose // if n != len(bs) && err == io.EOF { err = io.ErrUnexpectedEOF } return } var _ decReader = (*decRd)(nil)