forgejo/vendor/github.com/blevesearch/vellum/fst.go
2021-02-28 18:08:33 -05:00

301 lines
7.1 KiB
Go
Vendored

// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package vellum
import (
"io"
"github.com/willf/bitset"
)
// FST is an in-memory representation of a finite state transducer,
// capable of returning the uint64 value associated with
// each []byte key stored, as well as enumerating all of the keys
// in order.
type FST struct {
f io.Closer
ver int
len int
typ int
data []byte
decoder decoder
}
func new(data []byte, f io.Closer) (rv *FST, err error) {
rv = &FST{
data: data,
f: f,
}
rv.ver, rv.typ, err = decodeHeader(data)
if err != nil {
return nil, err
}
rv.decoder, err = loadDecoder(rv.ver, rv.data)
if err != nil {
return nil, err
}
rv.len = rv.decoder.getLen()
return rv, nil
}
// Contains returns true if this FST contains the specified key.
func (f *FST) Contains(val []byte) (bool, error) {
_, exists, err := f.Get(val)
return exists, err
}
// Get returns the value associated with the key. NOTE: a value of zero
// does not imply the key does not exist, you must consult the second
// return value as well.
func (f *FST) Get(input []byte) (uint64, bool, error) {
return f.get(input, nil)
}
func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
var total uint64
curr := f.decoder.getRoot()
state, err := f.decoder.stateAt(curr, prealloc)
if err != nil {
return 0, false, err
}
for _, c := range input {
_, curr, output := state.TransitionFor(c)
if curr == noneAddr {
return 0, false, nil
}
state, err = f.decoder.stateAt(curr, state)
if err != nil {
return 0, false, err
}
total += output
}
if state.Final() {
total += state.FinalOutput()
return total, true, nil
}
return 0, false, nil
}
// Version returns the encoding version used by this FST instance.
func (f *FST) Version() int {
return f.ver
}
// Len returns the number of entries in this FST instance.
func (f *FST) Len() int {
return f.len
}
// Type returns the type of this FST instance.
func (f *FST) Type() int {
return f.typ
}
// Close will unmap any mmap'd data (if managed by vellum) and it will close
// the backing file (if managed by vellum). You MUST call Close() for any
// FST instance that is created.
func (f *FST) Close() error {
if f.f != nil {
err := f.f.Close()
if err != nil {
return err
}
}
f.data = nil
f.decoder = nil
return nil
}
// Start returns the start state of this Automaton
func (f *FST) Start() int {
return f.decoder.getRoot()
}
// IsMatch returns if this state is a matching state in this Automaton
func (f *FST) IsMatch(addr int) bool {
match, _ := f.IsMatchWithVal(addr)
return match
}
// CanMatch returns if this state can ever transition to a matching state
// in this Automaton
func (f *FST) CanMatch(addr int) bool {
if addr == noneAddr {
return false
}
return true
}
// WillAlwaysMatch returns if from this state the Automaton will always
// be in a matching state
func (f *FST) WillAlwaysMatch(int) bool {
return false
}
// Accept returns the next state for this Automaton on input of byte b
func (f *FST) Accept(addr int, b byte) int {
next, _ := f.AcceptWithVal(addr, b)
return next
}
// IsMatchWithVal returns if this state is a matching state in this Automaton
// and also returns the final output value for this state
func (f *FST) IsMatchWithVal(addr int) (bool, uint64) {
s, err := f.decoder.stateAt(addr, nil)
if err != nil {
return false, 0
}
return s.Final(), s.FinalOutput()
}
// AcceptWithVal returns the next state for this Automaton on input of byte b
// and also returns the output value for the transition
func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) {
s, err := f.decoder.stateAt(addr, nil)
if err != nil {
return noneAddr, 0
}
_, next, output := s.TransitionFor(b)
return next, output
}
// Iterator returns a new Iterator capable of enumerating the key/value pairs
// between the provided startKeyInclusive and endKeyExclusive.
func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
return newIterator(f, startKeyInclusive, endKeyExclusive, nil)
}
// Search returns a new Iterator capable of enumerating the key/value pairs
// between the provided startKeyInclusive and endKeyExclusive that also
// satisfy the provided automaton.
func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
return newIterator(f, startKeyInclusive, endKeyExclusive, aut)
}
// Debug is only intended for debug purposes, it simply asks the underlying
// decoder visit each state, and pass it to the provided callback.
func (f *FST) Debug(callback func(int, interface{}) error) error {
addr := f.decoder.getRoot()
set := bitset.New(uint(addr))
stack := addrStack{addr}
stateNumber := 0
stack, addr = stack[:len(stack)-1], stack[len(stack)-1]
for addr != noneAddr {
if set.Test(uint(addr)) {
stack, addr = stack.Pop()
continue
}
set.Set(uint(addr))
state, err := f.decoder.stateAt(addr, nil)
if err != nil {
return err
}
err = callback(stateNumber, state)
if err != nil {
return err
}
for i := 0; i < state.NumTransitions(); i++ {
tchar := state.TransitionAt(i)
_, dest, _ := state.TransitionFor(tchar)
stack = append(stack, dest)
}
stateNumber++
stack, addr = stack.Pop()
}
return nil
}
type addrStack []int
func (a addrStack) Pop() (addrStack, int) {
l := len(a)
if l < 1 {
return a, noneAddr
}
return a[:l-1], a[l-1]
}
// Reader() returns a Reader instance that a single thread may use to
// retrieve data from the FST
func (f *FST) Reader() (*Reader, error) {
return &Reader{f: f}, nil
}
func (f *FST) GetMinKey() ([]byte, error) {
var rv []byte
curr := f.decoder.getRoot()
state, err := f.decoder.stateAt(curr, nil)
if err != nil {
return nil, err
}
for !state.Final() {
nextTrans := state.TransitionAt(0)
_, curr, _ = state.TransitionFor(nextTrans)
state, err = f.decoder.stateAt(curr, state)
if err != nil {
return nil, err
}
rv = append(rv, nextTrans)
}
return rv, nil
}
func (f *FST) GetMaxKey() ([]byte, error) {
var rv []byte
curr := f.decoder.getRoot()
state, err := f.decoder.stateAt(curr, nil)
if err != nil {
return nil, err
}
for state.NumTransitions() > 0 {
nextTrans := state.TransitionAt(state.NumTransitions() - 1)
_, curr, _ = state.TransitionFor(nextTrans)
state, err = f.decoder.stateAt(curr, state)
if err != nil {
return nil, err
}
rv = append(rv, nextTrans)
}
return rv, nil
}
// A Reader is meant for a single threaded use
type Reader struct {
f *FST
prealloc fstStateV1
}
func (r *Reader) Get(input []byte) (uint64, bool, error) {
return r.f.get(input, &r.prealloc)
}