tidy up and use go benchmark

Signed-off-by: Wangchong Zhou <fffonion@gmail.com>
This commit is contained in:
Wangchong Zhou 2018-09-28 12:55:53 -07:00
parent 6d709d52c1
commit 5262b2904c
No known key found for this signature in database
GPG key ID: B607274584E8D5E5
6 changed files with 793 additions and 517 deletions

View file

@ -142,7 +142,7 @@ func main() {
statsdListenTCP = kingpin.Flag("statsd.listen-tcp", "The TCP address on which to receive statsd metric lines. \"\" disables it.").Default(":9125").String() statsdListenTCP = kingpin.Flag("statsd.listen-tcp", "The TCP address on which to receive statsd metric lines. \"\" disables it.").Default(":9125").String()
mappingConfig = kingpin.Flag("statsd.mapping-config", "Metric mapping configuration file name.").String() mappingConfig = kingpin.Flag("statsd.mapping-config", "Metric mapping configuration file name.").String()
readBuffer = kingpin.Flag("statsd.read-buffer", "Size (in bytes) of the operating system's transmit read buffer associated with the UDP connection. Please make sure the kernel parameters net.core.rmem_max is set to a value greater than the value specified.").Int() readBuffer = kingpin.Flag("statsd.read-buffer", "Size (in bytes) of the operating system's transmit read buffer associated with the UDP connection. Please make sure the kernel parameters net.core.rmem_max is set to a value greater than the value specified.").Int()
dumpFSMPath = kingpin.Flag("statsd.dump-fsm", "The path to dump internal FSM generated for glob matching as Dot file.").Default("").String() dumpFSMPath = kingpin.Flag("debug.dump-fsm", "The path to dump internal FSM generated for glob matching as Dot file.").Default("").String()
) )
log.AddFlags(kingpin.CommandLine) log.AddFlags(kingpin.CommandLine)
@ -203,7 +203,7 @@ func main() {
if *dumpFSMPath != "" { if *dumpFSMPath != "" {
err := dumpFSM(mapper, *dumpFSMPath) err := dumpFSM(mapper, *dumpFSMPath)
if err != nil { if err != nil {
log.Fatal("Error dumpping FSM:", err) log.Fatal("Error dumping FSM:", err)
} }
} }
go watchConfig(*mappingConfig, mapper) go watchConfig(*mappingConfig, mapper)

48
pkg/mapper/fsm/dump.go Normal file
View file

@ -0,0 +1,48 @@
// Copyright 2018 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fsm
import (
"fmt"
"io"
)
// DumpFSM accepts a io.writer and write the current FSM into dot file format
func (f *FSM) DumpFSM(w io.Writer) {
idx := 0
states := make(map[int]*mappingState)
states[idx] = f.root
w.Write([]byte("digraph g {\n"))
w.Write([]byte("rankdir=LR\n")) // make it vertical
w.Write([]byte("node [ label=\"\",style=filled,fillcolor=white,shape=circle ]\n")) // remove label of node
for idx < len(states) {
for field, transition := range states[idx].transitions {
states[len(states)] = transition
w.Write([]byte(fmt.Sprintf("%d -> %d [label = \"%s\"];\n", idx, len(states)-1, field)))
if idx == 0 {
// color for metric types
w.Write([]byte(fmt.Sprintf("%d [color=\"#D6B656\",fillcolor=\"#FFF2CC\"];\n", len(states)-1)))
} else if transition.transitions == nil || len(transition.transitions) == 0 {
// color for end state
w.Write([]byte(fmt.Sprintf("%d [color=\"#82B366\",fillcolor=\"#D5E8D4\"];\n", len(states)-1)))
}
}
idx++
}
// color for start state
w.Write([]byte(fmt.Sprintf("0 [color=\"#a94442\",fillcolor=\"#f2dede\"];\n")))
w.Write([]byte("}"))
}

View file

@ -15,10 +15,15 @@ package fsm
import ( import (
"fmt" "fmt"
"regexp"
"strconv" "strconv"
"strings" "strings"
) )
var (
templateReplaceCaptureRE = regexp.MustCompile(`\$\{?([a-zA-Z0-9_\$]+)\}?`)
)
type templateFormatter struct { type templateFormatter struct {
captureIndexes []int captureIndexes []int
captureCount int captureCount int
@ -57,12 +62,11 @@ func (formatter *templateFormatter) format(captures map[int]string) string {
if formatter.captureCount == 0 { if formatter.captureCount == 0 {
// no label substitution, keep as it is // no label substitution, keep as it is
return formatter.fmtString return formatter.fmtString
} else {
indexes := formatter.captureIndexes
vargs := make([]interface{}, formatter.captureCount)
for i, idx := range indexes {
vargs[i] = captures[idx]
}
return fmt.Sprintf(formatter.fmtString, vargs...)
} }
indexes := formatter.captureIndexes
vargs := make([]interface{}, formatter.captureCount)
for i, idx := range indexes {
vargs[i] = captures[idx]
}
return fmt.Sprintf(formatter.fmtString, vargs...)
} }

View file

@ -14,8 +14,6 @@
package fsm package fsm
import ( import (
"fmt"
"io"
"regexp" "regexp"
"strings" "strings"
@ -23,10 +21,6 @@ import (
"github.com/prometheus/common/log" "github.com/prometheus/common/log"
) )
var (
templateReplaceCaptureRE = regexp.MustCompile(`\$\{?([a-zA-Z0-9_\$]+)\}?`)
)
type mappingState struct { type mappingState struct {
transitions map[string]*mappingState transitions map[string]*mappingState
minRemainingLength int minRemainingLength int
@ -48,18 +42,18 @@ type fsmBacktrackStackCursor struct {
} }
type FSM struct { type FSM struct {
root *mappingState root *mappingState
needsBacktracking bool metricTypes []string
metricTypes []string statesCount int
disableOrdering bool BacktrackingNeeded bool
statesCount int OrderingDisabled bool
} }
// NewFSM creates a new FSM instance // NewFSM creates a new FSM instance
func NewFSM(metricTypes []string, maxPossibleTransitions int, disableOrdering bool) *FSM { func NewFSM(metricTypes []string, maxPossibleTransitions int, orderingDisabled bool) *FSM {
fsm := FSM{} fsm := FSM{}
root := &mappingState{} root := &mappingState{}
root.transitions = make(map[string]*mappingState, 3) root.transitions = make(map[string]*mappingState, len(metricTypes))
metricTypes = append(metricTypes, "") metricTypes = append(metricTypes, "")
for _, field := range metricTypes { for _, field := range metricTypes {
@ -67,7 +61,7 @@ func NewFSM(metricTypes []string, maxPossibleTransitions int, disableOrdering bo
(*state).transitions = make(map[string]*mappingState, maxPossibleTransitions) (*state).transitions = make(map[string]*mappingState, maxPossibleTransitions)
root.transitions[string(field)] = state root.transitions[string(field)] = state
} }
fsm.disableOrdering = disableOrdering fsm.OrderingDisabled = orderingDisabled
fsm.metricTypes = metricTypes fsm.metricTypes = metricTypes
fsm.statesCount = 0 fsm.statesCount = 0
fsm.root = root fsm.root = root
@ -136,38 +130,112 @@ func (f *FSM) AddState(match string, name string, labels prometheus.Labels, matc
} }
// DumpFSM accepts a io.writer and write the current FSM into dot file format // GetMapping implements a mapping algorithm for Glob pattern
func (f *FSM) DumpFSM(w io.Writer) { func (f *FSM) GetMapping(statsdMetric string, statsdMetricType string) (interface{}, string, prometheus.Labels, bool) {
idx := 0 matchFields := strings.Split(statsdMetric, ".")
states := make(map[int]*mappingState) currentState := f.root.transitions[statsdMetricType]
states[idx] = f.root
w.Write([]byte("digraph g {\n")) // the cursor/pointer in the backtrack stack implemented as a double-linked list
w.Write([]byte("rankdir=LR\n")) // make it vertical var backtrackCursor *fsmBacktrackStackCursor
w.Write([]byte("node [ label=\"\",style=filled,fillcolor=white,shape=circle ]\n")) // remove label of node resumeFromBacktrack := false
for idx < len(states) { // the return variable
for field, transition := range states[idx].transitions { var finalState *mappingState
states[len(states)] = transition
w.Write([]byte(fmt.Sprintf("%d -> %d [label = \"%s\"];\n", idx, len(states)-1, field))) captures := make(map[int]string, len(matchFields))
if idx == 0 { // keep track of captured group so we don't need to do append() on captures
// color for metric types captureIdx := 0
w.Write([]byte(fmt.Sprintf("%d [color=\"#D6B656\",fillcolor=\"#FFF2CC\"];\n", len(states)-1))) filedsCount := len(matchFields)
} else if transition.transitions == nil || len(transition.transitions) == 0 { i := 0
// color for end state var state *mappingState
w.Write([]byte(fmt.Sprintf("%d [color=\"#82B366\",fillcolor=\"#D5E8D4\"];\n", len(states)-1))) for { // the loop for backtracking
for { // the loop for a single "depth only" search
var present bool
// if we resume from backtrack, we should skip this branch in this case
// since the state that were saved at the end of this branch
if !resumeFromBacktrack {
if len(currentState.transitions) > 0 {
field := matchFields[i]
state, present = currentState.transitions[field]
fieldsLeft := filedsCount - i - 1
// also compare length upfront to avoid unnecessary loop or backtrack
if !present || fieldsLeft > state.maxRemainingLength || fieldsLeft < state.minRemainingLength {
state, present = currentState.transitions["*"]
if !present || fieldsLeft > state.maxRemainingLength || fieldsLeft < state.minRemainingLength {
break
} else {
captures[captureIdx] = field
captureIdx++
}
} else if f.BacktrackingNeeded {
// if backtracking is needed, also check for alternative transition
altState, present := currentState.transitions["*"]
if !present || fieldsLeft > altState.maxRemainingLength || fieldsLeft < altState.minRemainingLength {
} else {
// push to backtracking stack
newCursor := fsmBacktrackStackCursor{prev: backtrackCursor, state: altState,
fieldIndex: i,
captureIndex: captureIdx, currentCapture: field,
}
// if this is not the first time, connect to the previous cursor
if backtrackCursor != nil {
backtrackCursor.next = &newCursor
}
backtrackCursor = &newCursor
}
}
} else {
// no more transitions for this state
break
}
} // backtrack will resume from here
// do we reach a final state?
if state.result != nil && i == filedsCount-1 {
if f.OrderingDisabled {
finalState = state
return formatLabels(finalState, captures)
} else if finalState == nil || finalState.resultPriority > state.resultPriority {
// if we care about ordering, try to find a result with highest prioity
finalState = state
}
break
} }
i++
if i >= filedsCount {
break
}
resumeFromBacktrack = false
currentState = state
}
if backtrackCursor == nil {
// if we are not doing backtracking or all path has been travesaled
break
} else {
// pop one from stack
state = backtrackCursor.state
currentState = state
i = backtrackCursor.fieldIndex
captureIdx = backtrackCursor.captureIndex + 1
// put the * capture back
captures[captureIdx-1] = backtrackCursor.currentCapture
backtrackCursor = backtrackCursor.prev
if backtrackCursor != nil {
// deref for GC
backtrackCursor.next = nil
}
resumeFromBacktrack = true
} }
idx++
} }
// color for start state
w.Write([]byte(fmt.Sprintf("0 [color=\"#a94442\",fillcolor=\"#f2dede\"];\n"))) return formatLabels(finalState, captures)
w.Write([]byte("}"))
} }
// TestIfNeedBacktracking test if backtrack is needed for current FSM // TestIfNeedBacktracking test if backtrack is needed for current mappings
func (f *FSM) TestIfNeedBacktracking(mappings []string) bool { func TestIfNeedBacktracking(mappings []string, orderingDisabled bool) bool {
needBacktrack := false backtrackingNeeded := false
// A has * in rules there's other transisitions at the same state // A has * in rules there's other transisitions at the same state
// this makes A the cause of backtracking // this makes A the cause of backtracking
ruleByLength := make(map[int][]string) ruleByLength := make(map[int][]string)
@ -221,9 +289,9 @@ func (f *FSM) TestIfNeedBacktracking(mappings []string) bool {
for i2, r2 := range rules { for i2, r2 := range rules {
if i2 != i1 && len(re1.FindStringSubmatchIndex(r2)) > 0 { if i2 != i1 && len(re1.FindStringSubmatchIndex(r2)) > 0 {
// log if we care about ordering and the superset occurs before // log if we care about ordering and the superset occurs before
if !f.disableOrdering && i1 < i2 { if !orderingDisabled && i1 < i2 {
log.Warnf("match \"%s\" is a super set of match \"%s\" but in a lower order, "+ log.Warnf("match \"%s\" is a super set of match \"%s\" but in a lower order, "+
"the first will never be matched\n", r1, r2) "the first will never be matched", r1, r2)
} }
currentRuleNeedBacktrack = false currentRuleNeedBacktrack = false
} }
@ -242,8 +310,8 @@ func (f *FSM) TestIfNeedBacktracking(mappings []string) bool {
if currentRuleNeedBacktrack { if currentRuleNeedBacktrack {
log.Warnf("backtracking required because of match \"%s\", "+ log.Warnf("backtracking required because of match \"%s\", "+
"matching performance may be degraded\n", r1) "matching performance may be degraded", r1)
needBacktrack = true backtrackingNeeded = true
} }
} }
} }
@ -252,112 +320,8 @@ func (f *FSM) TestIfNeedBacktracking(mappings []string) bool {
// since transistions are stored in (unordered) map // since transistions are stored in (unordered) map
// note: don't move this branch to the beginning of this function // note: don't move this branch to the beginning of this function
// since we need logs for superset rules // since we need logs for superset rules
f.needsBacktracking = !f.disableOrdering || needBacktrack
return f.needsBacktracking return !orderingDisabled || backtrackingNeeded
}
// GetMapping implements a mapping algorithm for Glob pattern
func (f *FSM) GetMapping(statsdMetric string, statsdMetricType string) (interface{}, string, prometheus.Labels, bool) {
matchFields := strings.Split(statsdMetric, ".")
currentState := f.root.transitions[statsdMetricType]
// the cursor/pointer in the backtrack stack implemented as a double-linked list
var backtrackCursor *fsmBacktrackStackCursor
resumeFromBacktrack := false
// the return variable
var finalState *mappingState
captures := make(map[int]string, len(matchFields))
// keep track of captured group so we don't need to do append() on captures
captureIdx := 0
filedsCount := len(matchFields)
i := 0
var state *mappingState
for { // the loop for backtracking
for { // the loop for a single "depth only" search
var present bool
// if we resume from backtrack, we should skip this branch in this case
// since the state that were saved at the end of this branch
if !resumeFromBacktrack {
if len(currentState.transitions) > 0 {
field := matchFields[i]
state, present = currentState.transitions[field]
fieldsLeft := filedsCount - i - 1
// also compare length upfront to avoid unnecessary loop or backtrack
if !present || fieldsLeft > state.maxRemainingLength || fieldsLeft < state.minRemainingLength {
state, present = currentState.transitions["*"]
if !present || fieldsLeft > state.maxRemainingLength || fieldsLeft < state.minRemainingLength {
break
} else {
captures[captureIdx] = field
captureIdx++
}
} else if f.needsBacktracking {
// if backtracking is needed, also check for alternative transition
altState, present := currentState.transitions["*"]
if !present || fieldsLeft > altState.maxRemainingLength || fieldsLeft < altState.minRemainingLength {
} else {
// push to backtracking stack
newCursor := fsmBacktrackStackCursor{prev: backtrackCursor, state: altState,
fieldIndex: i,
captureIndex: captureIdx, currentCapture: field,
}
// if this is not the first time, connect to the previous cursor
if backtrackCursor != nil {
backtrackCursor.next = &newCursor
}
backtrackCursor = &newCursor
}
}
} else {
// no more transitions for this state
break
}
} // backtrack will resume from here
// do we reach a final state?
if state.result != nil && i == filedsCount-1 {
if f.disableOrdering {
finalState = state
return formatLabels(finalState, captures)
} else if finalState == nil || finalState.resultPriority > state.resultPriority {
// if we care about ordering, try to find a result with highest prioity
finalState = state
}
break
}
i++
if i >= filedsCount {
break
}
resumeFromBacktrack = false
currentState = state
}
if backtrackCursor == nil {
// if we are not doing backtracking or all path has been travesaled
break
} else {
// pop one from stack
state = backtrackCursor.state
currentState = state
i = backtrackCursor.fieldIndex
captureIdx = backtrackCursor.captureIndex + 1
// put the * capture back
captures[captureIdx-1] = backtrackCursor.currentCapture
backtrackCursor = backtrackCursor.prev
if backtrackCursor != nil {
// deref for GC
backtrackCursor.next = nil
}
resumeFromBacktrack = true
}
}
return formatLabels(finalState, captures)
} }
func formatLabels(finalState *mappingState, captures map[int]string) (interface{}, string, prometheus.Labels, bool) { func formatLabels(finalState *mappingState, captures map[int]string) (interface{}, string, prometheus.Labels, bool) {

View file

@ -31,8 +31,6 @@ var (
metricLineRE = regexp.MustCompile(`^(\*\.|` + statsdMetricRE + `\.)+(\*|` + statsdMetricRE + `)$`) metricLineRE = regexp.MustCompile(`^(\*\.|` + statsdMetricRE + `\.)+(\*|` + statsdMetricRE + `)$`)
metricNameRE = regexp.MustCompile(`^([a-zA-Z_]|` + templateReplaceRE + `)([a-zA-Z0-9_]|` + templateReplaceRE + `)*$`) metricNameRE = regexp.MustCompile(`^([a-zA-Z_]|` + templateReplaceRE + `)([a-zA-Z0-9_]|` + templateReplaceRE + `)*$`)
labelNameRE = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]+$`) labelNameRE = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]+$`)
templateReplaceCaptureRE = regexp.MustCompile(`\$\{?([a-zA-Z0-9_\$]+)\}?`)
) )
type mapperConfigDefaults struct { type mapperConfigDefaults struct {
@ -81,20 +79,6 @@ var defaultQuantiles = []metricObjective{
{Quantile: 0.99, Error: 0.001}, {Quantile: 0.99, Error: 0.001},
} }
func min(x, y int) int {
if x < y {
return x
}
return y
}
func max(x, y int) int {
if x > y {
return x
}
return y
}
func (m *MetricMapper) InitFromYAMLString(fileContents string) error { func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
var n MetricMapper var n MetricMapper
@ -191,7 +175,7 @@ func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
mappings = append(mappings, mapping.Match) mappings = append(mappings, mapping.Match)
} }
} }
n.FSM.TestIfNeedBacktracking(mappings) n.FSM.BacktrackingNeeded = fsm.TestIfNeedBacktracking(mappings, n.FSM.OrderingDisabled)
m.FSM = n.FSM m.FSM = n.FSM
m.doRegex = n.doRegex m.doRegex = n.doRegex

File diff suppressed because it is too large Load diff