replace glob matching

Signed-off-by: Wangchong Zhou <fffonion@gmail.com>
This commit is contained in:
Wangchong Zhou 2018-09-11 17:31:26 -07:00
parent 825b734b3e
commit bfe23298aa
No known key found for this signature in database
GPG key ID: B607274584E8D5E5
4 changed files with 160 additions and 342 deletions

View file

@ -126,6 +126,7 @@ func main() {
statsdListenTCP = kingpin.Flag("statsd.listen-tcp", "The TCP address on which to receive statsd metric lines. \"\" disables it.").Default(":9125").String()
mappingConfig = kingpin.Flag("statsd.mapping-config", "Metric mapping configuration file name.").String()
readBuffer = kingpin.Flag("statsd.read-buffer", "Size (in bytes) of the operating system's transmit read buffer associated with the UDP connection. Please make sure the kernel parameters net.core.rmem_max is set to a value greater than the value specified.").Int()
dumpFSMPath = kingpin.Flag("statsd.dump-fsm", "The path to dump internal FSM generated for glob matching as Dot file.").Default("").String()
)
log.AddFlags(kingpin.CommandLine)
@ -178,6 +179,12 @@ func main() {
}
mapper := &mapper.MetricMapper{MappingsCount: mappingsCount}
if *dumpFSMPath != "" {
err := mapper.SetDumpFSMPath(*dumpFSMPath)
if err != nil {
log.Fatal("Error setting dump FSM path:", err)
}
}
if *mappingConfig != "" {
err := mapper.InitFromFile(*mappingConfig)
if err != nil {

View file

@ -32,23 +32,23 @@ var (
statsdMetricRE = `[a-zA-Z_](-?[a-zA-Z0-9_])+`
templateReplaceRE = `(\$\{?\d+\}?)`
metricLineRE = regexp.MustCompile(`^(\*\.|` + statsdMetricRE + `\.)+(\*|` + statsdMetricRE + `)$`)
metricNameRE = regexp.MustCompile(`^([a-zA-Z_]|` + templateReplaceRE + `)([a-zA-Z0-9_]|` + templateReplaceRE + `)*$`)
labelNameRE = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]+$`)
labelValueExpansionRE = regexp.MustCompile(`\${?(\d+)}?`)
metricLineRE = regexp.MustCompile(`^(\*\.|` + statsdMetricRE + `\.)+(\*|` + statsdMetricRE + `)$`)
metricNameRE = regexp.MustCompile(`^([a-zA-Z_]|` + templateReplaceRE + `)([a-zA-Z0-9_]|` + templateReplaceRE + `)*$`)
labelNameRE = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]+$`)
templateReplaceCaptureRE = regexp.MustCompile(`\$\{?([a-zA-Z0-9_\$]+)\}?`)
)
type mapperConfigDefaults struct {
TimerType TimerType `yaml:"timer_type"`
Buckets []float64 `yaml:"buckets"`
Quantiles []metricObjective `yaml:"quantiles"`
MatchType MatchType `yaml:"match_type"`
DumpFSM string `yaml:"dump_fsm"`
FSMFallback MatchType `yaml:"fsm_fallback"`
TimerType TimerType `yaml:"timer_type"`
Buckets []float64 `yaml:"buckets"`
Quantiles []metricObjective `yaml:"quantiles"`
MatchType MatchType `yaml:"match_type"`
}
type mappingState struct {
transitions map[string]*mappingState
transitionsMap map[string]*mappingState
transitionsArray []*mappingState
// result is nil unless there's a metric ends with this state
result *MetricMapping
}
@ -57,14 +57,18 @@ type MetricMapper struct {
Defaults mapperConfigDefaults `yaml:"defaults"`
Mappings []MetricMapping `yaml:"mappings"`
FSM *mappingState
mutex sync.Mutex
// if this is true, that means at least one matching rule is regex type
doRegex bool
dumpFSMPath string
mutex sync.Mutex
MappingsCount prometheus.Gauge
}
type labelFormatter struct {
captureIdx int
fmtString string
type templateFormatter struct {
captureIndexes []int
captureCount int
fmtString string
}
type matchMetricType string
@ -72,9 +76,10 @@ type matchMetricType string
type MetricMapping struct {
Match string `yaml:"match"`
Name string `yaml:"name"`
NameFormatter templateFormatter
regex *regexp.Regexp
Labels prometheus.Labels `yaml:"labels"`
LabelsFormatter map[string]labelFormatter
LabelsFormatter map[string]templateFormatter
TimerType TimerType `yaml:"timer_type"`
Buckets []float64 `yaml:"buckets"`
Quantiles []metricObjective `yaml:"quantiles"`
@ -95,6 +100,48 @@ var defaultQuantiles = []metricObjective{
{Quantile: 0.99, Error: 0.001},
}
func generateFormatter(valueExpr string, captureCount int) (templateFormatter, error) {
matches := templateReplaceCaptureRE.FindAllStringSubmatch(valueExpr, -1)
if len(matches) == 0 {
// if no regex reference found, keep it as it is
return templateFormatter{captureCount: 0, fmtString: valueExpr}, nil
}
var indexes []int
valueFormatter := valueExpr
for _, match := range matches {
idx, err := strconv.Atoi(match[len(match)-1])
if err != nil || idx > captureCount || idx < 1 {
// if index larger than captured count or using unsupported named capture group,
// replace with empty string
valueFormatter = strings.Replace(valueFormatter, match[0], "", -1)
} else {
valueFormatter = strings.Replace(valueFormatter, match[0], "%s", -1)
// note: the regex reference variable $? starts from 1
indexes = append(indexes, idx-1)
}
}
return templateFormatter{
captureIndexes: indexes,
captureCount: len(indexes),
fmtString: valueFormatter,
}, nil
}
func formatTemplate(formatter templateFormatter, captures map[int]string) string {
if formatter.captureCount == 0 {
// no label substitution, keep as it is
return formatter.fmtString
} else {
indexes := formatter.captureIndexes
vargs := make([]interface{}, formatter.captureCount)
for i, idx := range indexes {
vargs[i] = captures[idx]
}
return fmt.Sprintf(formatter.fmtString, vargs...)
}
}
func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
var n MetricMapper
@ -117,7 +164,7 @@ func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
maxPossibleTransitions := len(n.Mappings)
n.FSM = &mappingState{}
n.FSM.transitions = make(map[string]*mappingState, maxPossibleTransitions)
n.FSM.transitionsMap = make(map[string]*mappingState, maxPossibleTransitions)
for i := range n.Mappings {
maxPossibleTransitions--
@ -147,21 +194,25 @@ func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
currentMapping.Action = ActionTypeMap
}
if currentMapping.MatchType == MatchTypeFSM {
if currentMapping.MatchType == MatchTypeGlob {
if !metricLineRE.MatchString(currentMapping.Match) {
return fmt.Errorf("invalid match: %s", currentMapping.Match)
}
// first split by "."
matchFields := strings.Split(currentMapping.Match, ".")
// fill into our FSM
root := n.FSM
captureCount := 0
for i, field := range matchFields {
state, prs := root.transitions[field]
state, prs := root.transitionsMap[field]
if !prs {
state = &mappingState{}
(*state).transitions = make(map[string]*mappingState, maxPossibleTransitions)
root.transitions[field] = state
(*state).transitionsMap = make(map[string]*mappingState, maxPossibleTransitions)
root.transitionsMap[field] = state
// if this is last field, set result to currentMapping instance
if i == len(matchFields)-1 {
root.transitions[field].result = currentMapping
root.transitionsMap[field].result = currentMapping
}
}
if field == "*" {
@ -171,33 +222,29 @@ func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
// goto next state
root = state
}
currentLabelFormatter := make(map[string]labelFormatter, captureCount)
nameFmt, err := generateFormatter(currentMapping.Name, captureCount)
if err != nil {
return err
}
currentMapping.NameFormatter = nameFmt
currentLabelFormatter := make(map[string]templateFormatter, captureCount)
for label, valueExpr := range currentMapping.Labels {
matches := labelValueExpansionRE.FindAllStringSubmatch(valueExpr, -1)
if len(matches) == 0 {
// if no regex expansion found, keep it as it is
currentLabelFormatter[label] = labelFormatter{captureIdx: -1, fmtString: valueExpr}
continue
} else if len(matches) > 1 {
return fmt.Errorf("multiple captures is not supported in FSM matching type")
}
var valueFormatter string
idx, err := strconv.Atoi(matches[0][1])
lblFmt, err := generateFormatter(valueExpr, captureCount)
if err != nil {
return fmt.Errorf("invalid label value expression: %s", valueExpr)
return err
}
if idx > captureCount || idx < 1 {
// index larger than captured count, replace all expansion with empty string
valueFormatter = labelValueExpansionRE.ReplaceAllString(valueExpr, "")
idx = 0
} else {
valueFormatter = labelValueExpansionRE.ReplaceAllString(valueExpr, "%s")
}
currentLabelFormatter[label] = labelFormatter{captureIdx: idx - 1, fmtString: valueFormatter}
currentLabelFormatter[label] = lblFmt
}
currentMapping.LabelsFormatter = currentLabelFormatter
}
if currentMapping.MatchType == MatchTypeGlob || n.Defaults.FSMFallback == MatchTypeGlob {
} else {
if regex, err := regexp.Compile(currentMapping.Match); err != nil {
return fmt.Errorf("invalid regex %s in mapping: %v", currentMapping.Match, err)
} else {
currentMapping.regex = regex
}
m.doRegex = true
} /*else if currentMapping.MatchType == MatchTypeGlob {
if !metricLineRE.MatchString(currentMapping.Match) {
return fmt.Errorf("invalid match: %s", currentMapping.Match)
}
@ -210,13 +257,7 @@ func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
} else {
currentMapping.regex = regex
}
} else if currentMapping.MatchType == MatchTypeRegex || n.Defaults.FSMFallback == MatchTypeRegex {
if regex, err := regexp.Compile(currentMapping.Match); err != nil {
return fmt.Errorf("invalid regex %s in mapping: %v", currentMapping.Match, err)
} else {
currentMapping.regex = regex
}
}
} */
if currentMapping.TimerType == "" {
currentMapping.TimerType = n.Defaults.TimerType
@ -231,17 +272,17 @@ func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
}
}
if len(n.Defaults.DumpFSM) > 0 {
m.dumpFSM(n.Defaults.DumpFSM, n.FSM)
}
m.mutex.Lock()
defer m.mutex.Unlock()
m.Defaults = n.Defaults
m.Mappings = n.Mappings
if len(n.FSM.transitions) > 0 {
if len(n.FSM.transitionsMap) > 0 || len(n.FSM.transitionsArray) > 0 {
m.FSM = n.FSM
if m.dumpFSMPath != "" {
dumpFSM(m.dumpFSMPath, m.FSM)
}
}
if m.MappingsCount != nil {
@ -251,7 +292,12 @@ func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
return nil
}
func (m *MetricMapper) dumpFSM(fileName string, root *mappingState) {
func (m *MetricMapper) SetDumpFSMPath(path string) error {
m.dumpFSMPath = path
return nil
}
func dumpFSM(fileName string, root *mappingState) {
log.Infoln("Start dumping FSM to", fileName)
idx := 0
states := make(map[int]*mappingState)
@ -264,13 +310,12 @@ func (m *MetricMapper) dumpFSM(fileName string, root *mappingState) {
w.WriteString("node [ label=\"\",style=filled,fillcolor=white,shape=circle ]\n") // remove label of node
for idx < len(states) {
for field, transition := range states[idx].transitions {
for field, transition := range states[idx].transitionsMap {
states[len(states)] = transition
w.WriteString(fmt.Sprintf("%d -> %d [label = \"%s\"];\n", idx, len(states)-1, field))
if transition.transitions == nil || len(transition.transitions) == 0 {
if transition.transitionsMap == nil || len(transition.transitionsMap) == 0 {
w.WriteString(fmt.Sprintf("%d [color=\"#82B366\",fillcolor=\"#D5E8D4\"];\n", len(states)-1))
}
}
idx++
}
@ -289,18 +334,19 @@ func (m *MetricMapper) InitFromFile(fileName string) error {
}
func (m *MetricMapper) GetMapping(statsdMetric string, statsdMetricType MetricType) (*MetricMapping, prometheus.Labels, bool) {
// glob matching
if root := m.FSM; root != nil {
matchFields := strings.Split(statsdMetric, ".")
captures := make(map[int]string, len(matchFields))
captureIdx := 0
filedsCount := len(matchFields)
for i, field := range matchFields {
if root.transitions == nil {
if root.transitionsMap == nil {
break
}
state, prs := root.transitions[field]
state, prs := root.transitionsMap[field]
if !prs {
state, prs = root.transitions["*"]
state, prs = root.transitionsMap["*"]
if !prs {
break
}
@ -308,36 +354,33 @@ func (m *MetricMapper) GetMapping(statsdMetric string, statsdMetricType MetricTy
captureIdx++
}
if state.result != nil && i == filedsCount-1 {
// format valueExpr
mapping := *state.result
state.result.Name = formatTemplate(mapping.NameFormatter, captures)
labels := prometheus.Labels{}
for label := range mapping.Labels {
formatter := mapping.LabelsFormatter[label]
idx := formatter.captureIdx
var value string
if idx == -1 {
value = formatter.fmtString
} else {
value = fmt.Sprintf(formatter.fmtString, captures[idx])
}
labels[label] = string(value)
labels[label] = formatTemplate(mapping.LabelsFormatter[label], captures)
}
return state.result, labels, true
}
root = state
}
// if fsm_fallback is not defined, return immediately
if len(m.Defaults.FSMFallback) == 0 {
log.Infof("%s not matched by fsm\n", statsdMetric)
// if there's no regex match type, return immediately
if !m.doRegex {
return nil, nil, false
}
}
// regex matching
m.mutex.Lock()
defer m.mutex.Unlock()
for _, mapping := range m.Mappings {
// if a rule don't have regex matching type, the regex field is unset
if mapping.regex == nil {
continue
}
matches := mapping.regex.FindStringSubmatchIndex(statsdMetric)
if len(matches) == 0 {
continue

View file

@ -368,30 +368,6 @@ mappings:
`,
configBad: true,
},
//Config with multiple captures for fsm match type
{
config: `---
mappings:
- match: "foo.*.*"
match_type: fsm
name: "foo"
labels:
bar: "$1-$2"
`,
configBad: true,
},
//Config with non-numeric capture index for fsm match type
{
config: `---
mappings:
- match: "foo.*.*"
match_type: fsm
name: "foo"
labels:
bar: "$a"
`,
configBad: true,
},
//Config with non-matched metric.
{
config: `---
@ -542,7 +518,7 @@ mappings:
}
}
func TestFSMMatcher(t *testing.T) {
/*func TestRPS(t *testing.T) {
scenarios := []struct {
config string
configBad bool
@ -553,8 +529,6 @@ func TestFSMMatcher(t *testing.T) {
// Config with several mapping definitions.
{
config: `---
defaults:
match_type: "fsm"
mappings:
- match: test.dispatcher.*.*.*
name: "dispatch_events"
@ -573,23 +547,23 @@ mappings:
- match: request_time.*.*.*.*.*.*.*.*.*.*.*.*
name: "tyk_http_request"
labels:
method_and_path: "$1"
response_code: "$2"
apikey: "$3"
apiversion: "$4"
apiname: "$5"
apiid: "$6"
ipv4_t1: "$7"
ipv4_t2: "$8"
ipv4_t3: "$9"
ipv4_t4: "$10"
orgid: "$11"
oauthid: "$12"
method_and_path: "${1}"
response_code: "${2}"
apikey: "${3}"
apiversion: "${4}"
apiname: "${5}"
apiid: "${6}"
ipv4_t1: "${7}"
ipv4_t2: "${8}"
ipv4_t3: "${9}"
ipv4_t4: "${10}"
orgid: "${11}"
oauthid: "${12}"
- match: "*.*"
name: "catchall"
labels:
first: "$1"
second: "second_label_$2"
second: "$2"
third: "$3"
job: "-"
`,
@ -633,100 +607,7 @@ mappings:
name: "catchall",
labels: map[string]string{
"first": "foo",
"second": "second_label_bar",
"third": "",
"job": "-",
},
},
"foo.bar.baz": {},
},
},
// local match_type
{
config: `---
mappings:
- match: test.dispatcher.*.*.*
name: "dispatch_events"
labels:
match_type: "fsm"
processor: "$1"
action: "$2"
result: "$3"
job: "test_dispatcher"
- match: test.my-dispatch-host01.name.dispatcher.*.*.*
name: "host_dispatch_events"
labels:
match_type: "fsm"
processor: "$1"
action: "$2"
result: "$3"
job: "test_dispatcher"
- match: request_time.*.*.*.*.*.*.*.*.*.*.*.*
name: "tyk_http_request"
labels:
match_type: "fsm"
method_and_path: "$1"
response_code: "$2"
apikey: "$3"
apiversion: "$4"
apiname: "$5"
apiid: "$6"
ipv4_t1: "$7"
ipv4_t2: "$8"
ipv4_t3: "$9"
ipv4_t4: "$10"
orgid: "$11"
oauthid: "$12"
- match: "*.*"
name: "catchall"
labels:
match_type: "fsm"
first: "$1"
second: "second_label_$2"
third: "$3"
job: "-"
`,
mappings: mappings{
"test.dispatcher.FooProcessor.send.succeeded": {
name: "dispatch_events",
labels: map[string]string{
"processor": "FooProcessor",
"action": "send",
"result": "succeeded",
"job": "test_dispatcher",
},
},
"test.my-dispatch-host01.name.dispatcher.FooProcessor.send.succeeded": {
name: "host_dispatch_events",
labels: map[string]string{
"processor": "FooProcessor",
"action": "send",
"result": "succeeded",
"job": "test_dispatcher",
},
},
"request_time.get/threads/1/posts.200.00000000.nonversioned.discussions.a11bbcdf0ac64ec243658dc64b7100fb.172.20.0.1.12ba97b7eaa1a50001000001.": {
name: "tyk_http_request",
labels: map[string]string{
"method_and_path": "get/threads/1/posts",
"response_code": "200",
"apikey": "00000000",
"apiversion": "nonversioned",
"apiname": "discussions",
"apiid": "a11bbcdf0ac64ec243658dc64b7100fb",
"ipv4_t1": "172",
"ipv4_t2": "20",
"ipv4_t3": "0",
"ipv4_t4": "1",
"orgid": "12ba97b7eaa1a50001000001",
"oauthid": "",
},
},
"foo.bar": {
name: "catchall",
labels: map[string]string{
"first": "foo",
"second": "second_label_bar",
"second": "bar",
"third": "",
"job": "-",
},
@ -747,139 +628,29 @@ mappings:
}
var dummyMetricType MetricType = ""
for metric, mapping := range scenario.mappings {
m, labels, present := mapper.GetMapping(metric, dummyMetricType)
if present && mapping.name != "" && m.Name != mapping.name {
t.Fatalf("%d.%q: Expected name %v, got %v", i, metric, m.Name, mapping.name)
}
if mapping.notPresent && present {
t.Fatalf("%d.%q: Expected metric to not be present", i, metric)
}
if len(labels) != len(mapping.labels) {
t.Fatalf("%d.%q: Expected %d labels, got %d", i, metric, len(mapping.labels), len(labels))
}
for label, value := range labels {
if mapping.labels[label] != value {
t.Fatalf("%d.%q: Expected labels %v, got %v", i, metric, mapping, labels)
start := int32(time.Now().Unix())
for j := 1; j < 100000; j++ {
for metric, mapping := range scenario.mappings {
m, labels, present := mapper.GetMapping(metric, dummyMetricType)
if present && mapping.name != "" && m.Name != mapping.name {
t.Fatalf("%d.%q: Expected name %v, got %v", i, metric, m.Name, mapping.name)
}
if mapping.notPresent && present {
t.Fatalf("%d.%q: Expected metric to not be present", i, metric)
}
if len(labels) != len(mapping.labels) {
t.Fatalf("%d.%q: Expected %d labels, got %d", i, metric, len(mapping.labels), len(labels))
}
for label, value := range labels {
if mapping.labels[label] != value {
t.Fatalf("%d.%q: Expected labels %v, got %v", i, metric, mapping, labels)
}
}
}
}
fmt.Println("finished in", int32(time.Now().Unix())-start)
}
}
func TestFSMMatcherFallbackRegex(t *testing.T) {
scenarios := []struct {
config string
configBad bool
mappings mappings
}{
// Config with simple matcher as default mathcer and fallback_regex to false.
{
config: `---
defaults:
match_type: "fsm"
mappings:
- match: client.*.request.duration
name: "request_size"
labels:
client: "$1"
- match: client.*.*.size
name: "request_response_size"
labels:
client: "$1"
direction: "$2"
`,
mappings: mappings{
"client.a.request.duration": {
name: "request_size",
labels: map[string]string{
"client": "a",
},
},
"client.a.request.size": {},
"client.a.response.size": {
name: "request_response_size",
labels: map[string]string{
"client": "a",
"direction": "response",
},
},
},
},
// Config with simple matcher as default mathcer and fallback_regex to true.
{
config: `---
defaults:
match_type: "fsm"
fsm_fallback: "glob"
mappings:
- match: client.*.request.duration
name: "request_size"
labels:
client: "$1"
- match: client.*.*.size
name: "request_response_size"
labels:
client: "$1"
direction: "$2"
`,
mappings: mappings{
"client.a.request.duration": {
name: "request_size",
labels: map[string]string{
"client": "a",
},
},
"client.a.request.size": {
name: "request_response_size",
labels: map[string]string{
"client": "a",
"direction": "request",
},
},
"client.a.response.size": {
name: "request_response_size",
labels: map[string]string{
"client": "a",
"direction": "response",
},
},
},
},
}
mapper := MetricMapper{}
for i, scenario := range scenarios {
err := mapper.InitFromYAMLString(scenario.config)
if err != nil && !scenario.configBad {
t.Fatalf("%d. Config load error: %s %s", i, scenario.config, err)
}
if err == nil && scenario.configBad {
t.Fatalf("%d. Expected bad config, but loaded ok: %s", i, scenario.config)
}
var dummyMetricType MetricType = ""
for metric, mapping := range scenario.mappings {
m, labels, present := mapper.GetMapping(metric, dummyMetricType)
if present && mapping.name != "" && m.Name != mapping.name {
t.Fatalf("%d.%q: Expected name %v, got %v", i, metric, m.Name, mapping.name)
}
if mapping.notPresent && present {
t.Fatalf("%d.%q: Expected metric to not be present", i, metric)
}
if len(labels) != len(mapping.labels) {
t.Fatalf("%d.%q: Expected %d labels, got %d", i, metric, len(mapping.labels), len(labels))
}
for label, value := range labels {
if mapping.labels[label] != value {
t.Fatalf("%d.%q: Expected labels %v, got %v", i, metric, mapping, labels)
}
}
}
}
}
}*/
func TestAction(t *testing.T) {
scenarios := []struct {

View file

@ -20,7 +20,6 @@ type MatchType string
const (
MatchTypeGlob MatchType = "glob"
MatchTypeRegex MatchType = "regex"
MatchTypeFSM MatchType = "fsm"
MatchTypeDefault MatchType = ""
)
@ -33,8 +32,6 @@ func (t *MatchType) UnmarshalYAML(unmarshal func(interface{}) error) error {
switch MatchType(v) {
case MatchTypeRegex:
*t = MatchTypeRegex
case MatchTypeFSM:
*t = MatchTypeFSM
case MatchTypeGlob, MatchTypeDefault:
*t = MatchTypeGlob
default: