Use less regex matching to parse lines (does not provide a performance gain though)

2025-06-05 06:58:56 +00:00 · 2007-11-15 15:06:37 +02:00 · 2007-11-15 15:06:37 +02:00 · b3f21b5a59
commit b3f21b5a59
parent df2d890d40
2 changed files with 31 additions and 17 deletions
--- a/debug-viewer/GstDebugViewer/Data.py
+++ b/debug-viewer/GstDebugViewer/Data.py
@ -112,9 +112,7 @@ def default_log_line_regex_ ():
    # "0x8165430 "
    THREAD = r"(0x[0-9a-f]+) +" #r"\((0x[0-9a-f]+) - "
    # "0:00:00.777913000  "
-    #TIME = r"([0-9]+:[0-9][0-9]:[0-9][0-9]\.[0-9]+) +"
-    TIME = " +" # Only eating whitespace before PID away, we parse timestamps
-                # without regex.  
+    TIME = r"([0-9]+:[0-9][0-9]:[0-9][0-9]\.[0-9]+) +"
    CATEGORY = "([A-Za-z_-]+) +" # "GST_REFCOUNTING ", "flacdec "
    # "  3089 "
    PID = r"([0-9]+) +"
@ -126,10 +124,13 @@ def default_log_line_regex_ ():
    OBJECT = "(?:<([^>]+)>)?"
    MESSAGE = " (.+)"

-    expressions = [TIME, PID, THREAD, LEVEL, CATEGORY, FILENAME, LINE, FUNCTION,
-                   OBJECT, MESSAGE]
-##     expressions = [LEVEL, THREAD, TIME, CATEGORY, PID, FILENAME, LINE,
-##                    FUNCTION, OBJECT, MESSAGE]
+    expressions = [CATEGORY, FILENAME, LINE, FUNCTION, OBJECT, MESSAGE]
+    # New log format:
+    ## expressions = [TIME, PID, THREAD, LEVEL, CATEGORY, FILENAME, LINE, FUNCTION,
+    ##                OBJECT, MESSAGE]
+    # Old log format:
+    ## expressions = [LEVEL, THREAD, TIME, CATEGORY, PID, FILENAME, LINE,
+    ##                FUNCTION, OBJECT, MESSAGE]

    return expressions

--- a/debug-viewer/GstDebugViewer/GUI.py
+++ b/debug-viewer/GstDebugViewer/GUI.py
@ -217,22 +217,35 @@ class LazyLogModel (LogModelBase):
            line = self.__fileobj.readline ()

        ts_len = 17
-        ts = Data.parse_time (line[:ts_len])
-        match = self.__line_regex.match (line[ts_len:-len (os.linesep)])
+        pid_len = 5
+        thread_len = 9 # FIXME: %p, so this should be larger on a 64 bit CPU, no?
+        level_len = 5
+
+        non_regex_len = ts_len + 1 + pid_len + thread_len + 1 + level_len + 1
+        non_regex_line = line[:non_regex_len]
+        regex_line = line[non_regex_len:]
+
+        try:
+            prefix = non_regex_line.rstrip ()
+            while "  " in prefix:
+                prefix = prefix.replace ("  ", " ")
+            ts_s, pid_s, thread_s, level_s = prefix.split (" ")
+            ts = Data.parse_time (ts_s)
+            pid = int (pid_s)
+            thread = int (thread_s, 16)
+            level = Data.DebugLevel (level_s)
+            match = self.__line_regex.match (regex_line[:-len (os.linesep)].lstrip ())
+        except ValueError:
+            match = None
+
        if match is None:
            # FIXME?
-            groups = [ts, 0, 0, Data.DebugLevelNone, "", "", 0, "", "", line[ts_len:-len (os.linesep)]]
+            groups = [0, 0, 0, Data.DebugLevelNone, "", "", 0, "", "", line[ts_len:-len (os.linesep)]]
        else:            
-            groups = [ts] + list (match.groups ())
+            groups = [ts, pid, thread, level] + list (match.groups ())

            # TODO: Figure out how much string interning can save here and how
            # much run time speed it costs!
-            groups[1] = int (groups[1]) # pid
-            groups[2] = int (groups[2], 16) # thread pointer
-            try:
-                groups[3] = Data.DebugLevel (groups[3])
-            except ValueError:
-                groups[3] = Data.DebugLevelNone
            groups[6] = int (groups[6]) # line
            groups[8] = groups[8] or "" # object (optional)