Use less regex matching to parse lines (does not provide a performance gain though)

This commit is contained in:
René Stadler 2007-11-15 15:06:37 +02:00 committed by Stefan Sauer
parent df2d890d40
commit b3f21b5a59
2 changed files with 31 additions and 17 deletions

View file

@ -112,9 +112,7 @@ def default_log_line_regex_ ():
# "0x8165430 "
THREAD = r"(0x[0-9a-f]+) +" #r"\((0x[0-9a-f]+) - "
# "0:00:00.777913000 "
#TIME = r"([0-9]+:[0-9][0-9]:[0-9][0-9]\.[0-9]+) +"
TIME = " +" # Only eating whitespace before PID away, we parse timestamps
# without regex.
TIME = r"([0-9]+:[0-9][0-9]:[0-9][0-9]\.[0-9]+) +"
CATEGORY = "([A-Za-z_-]+) +" # "GST_REFCOUNTING ", "flacdec "
# " 3089 "
PID = r"([0-9]+) +"
@ -126,10 +124,13 @@ def default_log_line_regex_ ():
OBJECT = "(?:<([^>]+)>)?"
MESSAGE = " (.+)"
expressions = [TIME, PID, THREAD, LEVEL, CATEGORY, FILENAME, LINE, FUNCTION,
OBJECT, MESSAGE]
## expressions = [LEVEL, THREAD, TIME, CATEGORY, PID, FILENAME, LINE,
## FUNCTION, OBJECT, MESSAGE]
expressions = [CATEGORY, FILENAME, LINE, FUNCTION, OBJECT, MESSAGE]
# New log format:
## expressions = [TIME, PID, THREAD, LEVEL, CATEGORY, FILENAME, LINE, FUNCTION,
## OBJECT, MESSAGE]
# Old log format:
## expressions = [LEVEL, THREAD, TIME, CATEGORY, PID, FILENAME, LINE,
## FUNCTION, OBJECT, MESSAGE]
return expressions

View file

@ -217,22 +217,35 @@ class LazyLogModel (LogModelBase):
line = self.__fileobj.readline ()
ts_len = 17
ts = Data.parse_time (line[:ts_len])
match = self.__line_regex.match (line[ts_len:-len (os.linesep)])
pid_len = 5
thread_len = 9 # FIXME: %p, so this should be larger on a 64 bit CPU, no?
level_len = 5
non_regex_len = ts_len + 1 + pid_len + thread_len + 1 + level_len + 1
non_regex_line = line[:non_regex_len]
regex_line = line[non_regex_len:]
try:
prefix = non_regex_line.rstrip ()
while " " in prefix:
prefix = prefix.replace (" ", " ")
ts_s, pid_s, thread_s, level_s = prefix.split (" ")
ts = Data.parse_time (ts_s)
pid = int (pid_s)
thread = int (thread_s, 16)
level = Data.DebugLevel (level_s)
match = self.__line_regex.match (regex_line[:-len (os.linesep)].lstrip ())
except ValueError:
match = None
if match is None:
# FIXME?
groups = [ts, 0, 0, Data.DebugLevelNone, "", "", 0, "", "", line[ts_len:-len (os.linesep)]]
groups = [0, 0, 0, Data.DebugLevelNone, "", "", 0, "", "", line[ts_len:-len (os.linesep)]]
else:
groups = [ts] + list (match.groups ())
groups = [ts, pid, thread, level] + list (match.groups ())
# TODO: Figure out how much string interning can save here and how
# much run time speed it costs!
groups[1] = int (groups[1]) # pid
groups[2] = int (groups[2], 16) # thread pointer
try:
groups[3] = Data.DebugLevel (groups[3])
except ValueError:
groups[3] = Data.DebugLevelNone
groups[6] = int (groups[6]) # line
groups[8] = groups[8] or "" # object (optional)