gstreamer/debug-viewer/GstDebugViewer/Data.py

# -*- coding: utf-8; mode: python; -*-
#
#  GStreamer Debug Viewer - View and analyze GStreamer debug log files
#
#  Copyright (C) 2007 René Stadler <mail@renestadler.de>
#
#  This program is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by the Free
#  Software Foundation; either version 3 of the License, or (at your option)
#  any later version.
#
#  This program is distributed in the hope that it will be useful, but WITHOUT
#  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
#  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
#  more details.
#
#  You should have received a copy of the GNU General Public License along with
#  this program.  If not, see <http://www.gnu.org/licenses/>.

"""GStreamer Debug Viewer Data module."""

import os
import logging
import re

# Nanosecond resolution (like gst.SECOND)
SECOND = 1000000000

def time_args (ts):

    secs = ts // SECOND

    return "%i:%02i:%02i.%09i" % (secs // 60**2,
                                  secs // 60 % 60,
                                  secs % 60,
                                  ts % SECOND,)

def time_diff_args (time_diff):

    if time_diff >= 0:
        sign = "+"
    else:
        sign = "-"

    secs = abs (time_diff) // SECOND

    return "%s%02i:%02i.%09i" % (sign,
                                 secs // 60,
                                 secs % 60,
                                 abs (time_diff) % SECOND,)

def time_args_no_hours (ts):

    secs = ts // SECOND

    return "%02i:%02i.%09i" % (secs // 60,
                               secs % 60,
                               ts % SECOND,)

def parse_time (st):

    """Parse time strings that look like "0:00:00.0000000"."""

    h, m, s = st.split (":")
    secs, subsecs = s.split (".")

    return (long ((int (h) * 60**2 + int (m) * 60) * SECOND) +
            long (secs) * SECOND + long (subsecs))

class DebugLevel (int):

    __names = ["NONE", "ERROR", "WARN", "INFO", "DEBUG", "LOG", "FIXME", "TRACE"]
    __instances = {}

    def __new__ (cls, level):

        try:
            level_int = int (level)
        except (ValueError, TypeError,):
            try:
                level_int = cls.__names.index (level.upper ())
            except ValueError:
                raise ValueError ("no debug level named %r" % (level,))
        if level_int in cls.__instances:
            return cls.__instances[level_int]
        else:
            new_instance = int.__new__ (cls, level_int)
            new_instance.name = cls.__names[level_int]
            cls.__instances[level_int] = new_instance
            return new_instance

    def __repr__ (self):

        return "<%s %s (%i)>" % (type (self).__name__, self.__names[self], self,)

    def higher_level (self):

        if self == len (self.__names) - 1:
            raise ValueError ("already the highest debug level")

        return DebugLevel (self + 1)

    def lower_level (self):

        if self == 0:
            raise ValueError ("already the lowest debug level")

        return DebugLevel (self - 1)

debug_level_none = DebugLevel ("NONE")
debug_level_error = DebugLevel ("ERROR")
debug_level_warning = DebugLevel ("WARN")
debug_level_info = DebugLevel ("INFO")
debug_level_debug = DebugLevel ("DEBUG")
debug_level_log = DebugLevel ("LOG")
debug_level_fixme = DebugLevel ("FIXME")
debug_level_trace = DebugLevel ("TRACE")
debug_levels = [debug_level_none,
                debug_level_trace,
                debug_level_fixme,
                debug_level_log,
                debug_level_debug,
                debug_level_info,
                debug_level_warning,
                debug_level_error]

# For stripping color codes:
_escape = re.compile ("\x1b\\[[0-9;]*m")
def strip_escape (s):

    # FIXME: This can be optimized further!

    while "\x1b" in s:
        s = _escape.sub ("", s)
    return s

def default_log_line_regex_ ():

    # "DEBUG             "
    LEVEL = "([A-Z]+)\s+"
    # "0x8165430 "
    THREAD = r"(0x[0-9a-f]+)\s+" #r"\((0x[0-9a-f]+) - "
    # "0:00:00.777913000  "
    TIME = r"(\d+:\d\d:\d\d\.\d+)\s+"
    CATEGORY = "([A-Za-z0-9_-]+)\s+" # "GST_REFCOUNTING ", "flacdec "
    # "  3089 "
    PID = r"(\d+)\s+"
    FILENAME = r"([^:]+):"
    LINE = r"(\d+):"
    FUNCTION = "([A-Za-z0-9_]+):"
    # FIXME: When non-g(st)object stuff is logged with *_OBJECT (like
    # buffers!), the address is printed *without* <> brackets!
    OBJECT = "(?:<([^>]+)> )?"
    MESSAGE = "(.+)"

    # New log format:
    expressions = [TIME, PID, THREAD, LEVEL, CATEGORY, FILENAME, LINE, FUNCTION,
                   OBJECT, MESSAGE]
    # Old log format:
    ## expressions = [LEVEL, THREAD, TIME, CATEGORY, PID, FILENAME, LINE,
    ##                FUNCTION, OBJECT, MESSAGE]

    return expressions

def default_log_line_regex ():

    expressions = default_log_line_regex_ ()
    return re.compile ("".join (expressions))

class Producer (object):

    def __init__ (self):

        self.consumers = []

    def have_load_started (self):

        for consumer in self.consumers:
            consumer.handle_load_started ()

    def have_load_finished (self):

        for consumer in self.consumers:
            consumer.handle_load_finished ()

class SortHelper (object):

    def __init__ (self, fileobj, offsets):

        self._gen = self.__gen (fileobj, offsets)
        self._gen.next ()

        # Override in the instance, for performance (this gets called in an
        # inner loop):
        self.find_insert_position = self._gen.send

    @staticmethod
    def find_insert_position (insert_time_string):

        # Stub for documentary purposes.

        pass

    @staticmethod
    def __gen (fileobj, offsets):

        from math import floor
        tell = fileobj.tell
        seek = fileobj.seek
        read = fileobj.read
        time_len = len (time_args (0))

        # We remember the previous insertion point. This gives a nice speed up
        # for larger bubbles which are already sorted. TODO: In practice, log
        # lines only get out of order across threads. Need to check if it pays
        # to parse the thread here and maintain multiple insertion points for
        # heavily interleaved parts of the log.
        pos = 0
        pos_time_string = ""

        insert_pos = None
        while True:
            insert_time_string = (yield insert_pos)

            save_offset = tell ()

            if pos_time_string <= insert_time_string:
                lo = pos
                hi = len (offsets)
            else:
                lo = 0
                hi = pos

            # This is a bisection search, except we don't cut the range in the
            # middle each time, but at the 90th percentile. This is because
            # logs are "mostly sorted", so the insertion point is much more
            # likely to be at the end anyways:
            while lo < hi:
                mid = int (floor (lo * 0.1 + hi * 0.9))
                seek (offsets[mid])
                mid_time_string = read (time_len)
                if insert_time_string < mid_time_string:
                    hi = mid
                else:
                    lo = mid + 1
            pos = lo
            # Caller will replace row at pos with the new one, so this is
            # correct:
            pos_time_string = insert_time_string

            insert_pos = pos

            seek (save_offset)

class LineCache (Producer):

    _lines_per_iteration = 50000

    def __init__ (self, fileobj, dispatcher):

        Producer.__init__ (self)

        self.logger = logging.getLogger ("linecache")
        self.dispatcher = dispatcher

        self.__fileobj = fileobj
        self.__fileobj.seek (0, 2)
        self.__file_size = self.__fileobj.tell ()
        self.__fileobj.seek (0)

        self.offsets = []
        self.levels = [] # FIXME

    def start_loading (self):

        self.logger.debug ("dispatching load process")
        self.have_load_started ()
        self.dispatcher (self.__process ())

    def get_progress (self):

        return float (self.__fileobj.tell ()) / self.__file_size

    def __process (self):

        offsets = self.offsets
        levels = self.levels

        dict_levels = {"T" : debug_level_trace, "F" : debug_level_fixme,
                       "L" : debug_level_log, "D" : debug_level_debug,
                       "I" : debug_level_info, "W" : debug_level_warning,
                       "E" : debug_level_error, " " : debug_level_none}
        rexp = re.compile (r"\d:\d\d:\d\d\.\d+\s+\d+\s+0x[0-9a-f]+\s+([TFLDIEW ])")

        # Moving attribute lookups out of the loop:
        readline = self.__fileobj.readline
        tell = self.__fileobj.tell
        rexp_match = rexp.match
        levels_append = levels.append
        offsets_append = offsets.append
        dict_levels_get = dict_levels.get

        self.__fileobj.seek (0)
        limit = self._lines_per_iteration
        last_line = ""
        i = 0
        sort_helper = SortHelper (self.__fileobj, offsets)
        find_insert_position = sort_helper.find_insert_position
        while True:
            offset = tell ()
            line = readline ()
            if not line:
                break
            # if line[18] == "\x1b":
            #     line = strip_escape (line)
            match = rexp_match (line)
            if match is None:
                continue

            # Timestamp is in the very beginning of the row, and can be sorted
            # by lexical comparison. That's why we don't bother parsing the
            # time to integer. We also don't have to take a substring here,
            # which would be a useless memcpy.
            if line >= last_line:
                levels_append (dict_levels_get (match.group (1), debug_level_none))
                offsets_append (offset)
                last_line = line
            else:
                pos = find_insert_position (line)
                levels.insert (pos, dict_levels_get (match.group (1), debug_level_none))
                offsets.insert (pos, offset)
            i += 1
            if i >= limit:
                i = 0
                yield True

        self.have_load_finished ()
        yield False

class LogLine (list):

    _line_regex = default_log_line_regex ()

    @classmethod
    def parse_full (cls, line_string):

        match = cls._line_regex.match (line_string)
        if match is None:
            ## raise ValueError ("not a valid log line (%r)" % (line_string,))
            groups = [0, 0, 0, 0, "", "", 0, "", "", 0]
            return cls (groups)

        line = cls (match.groups ())
        # Timestamp.
        line[0] = parse_time (line[0])
        # PID.
        line[1] = int (line[1])
        # Thread.
        line[2] = int (line[2], 16)
        # Level (this is handled in LineCache).
        line[3] = 0
        # Line.
        line[6] = int (line[6])
        # Message start offset.
        line[9] = match.start (9 + 1)

        for col_id in (4,   # COL_CATEGORY
                       5,   # COL_FILENAME
                       7,   # COL_FUNCTION,
                       8,): # COL_OBJECT
            line[col_id] = intern (line[col_id] or "")

        return line

class LogLines (object):

    def __init__ (self, fileobj, line_cache):

        self.__fileobj = fileobj
        self.__line_cache = line_cache

    def __len__ (self):

        return len (self.__line_cache.offsets)

    def __getitem__ (self, line_index):

        offset = self.__line_cache.offsets[line_index]
        self.__fileobj.seek (offset)
        line_string = self.__fileobj.readline ()
        line = LogLine.parse_full (line_string)
        msg = line_string[line[-1]:]
        line[-1] = msg
        return line

    def __iter__ (self):

        l = len (self)
        i = 0
        while i < l:
            yield self[i]
            i += 1

class LogFile (Producer):

    def __init__ (self, filename, dispatcher):

        import mmap

        Producer.__init__ (self)

        self.logger = logging.getLogger ("logfile")

        self.path = os.path.normpath (os.path.abspath (filename))
        self.__real_fileobj = file (filename, "rb")
        self.fileobj = mmap.mmap (self.__real_fileobj.fileno (), 0, access = mmap.ACCESS_READ)
        self.line_cache = LineCache (self.fileobj, dispatcher)
        self.line_cache.consumers.append (self)

    def get_full_line (self, line_index):

        offset = self.line_cache.offsets[line_index]
        self.fileobj.seek (offset)
        line_string = self.fileobj.readline ()
        line = LogLine.parse_full (line_string)
        msg = line_string[line[-1]:]
        line[-1] = msg
        return line

    def start_loading (self):

        self.logger.debug ("starting load")
        self.line_cache.start_loading ()

    def get_load_progress (self):

        return self.line_cache.get_progress ()

    def handle_load_started (self):

        # Chain up to our consumers:
        self.have_load_started ()

    def handle_load_finished (self):

        self.lines = LogLines (self.fileobj, self.line_cache)

        # Chain up to our consumers:
        self.have_load_finished ()
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`# -- coding: utf-8; mode: python; --`
			`#`
Fix copyright/license headers and module docstrings 2007-11-22 08:47:06 +00:00			`# GStreamer Debug Viewer - View and analyze GStreamer debug log files`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`#`
			`# Copyright (C) 2007 René Stadler <mail@renestadler.de>`
			`#`
			`# This program is free software; you can redistribute it and/or modify it`
			`# under the terms of the GNU General Public License as published by the Free`
			`# Software Foundation; either version 3 of the License, or (at your option)`
			`# any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful, but WITHOUT`
			`# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or`
			`# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for`
			`# more details.`
			`#`
			`# You should have received a copy of the GNU General Public License along with`
			`# this program. If not, see <http://www.gnu.org/licenses/>.`

Fix copyright/license headers and module docstrings 2007-11-22 08:47:06 +00:00			`"""GStreamer Debug Viewer Data module."""`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00
And now make it actually run\! 2007-11-19 13:52:01 +00:00			`import os`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`import logging`
			`import re`

			`# Nanosecond resolution (like gst.SECOND)`
			`SECOND = 1000000000`

			`def time_args (ts):`

			`secs = ts // SECOND`

			`return "%i:%02i:%02i.%09i" % (secs // 60**2,`
			`secs // 60 % 60,`
			`secs % 60,`
			`ts % SECOND,)`

Add new base time feature The log view context menu gains a new action "Set base time", which changes the time column to show the delta to the selected row. 2009-10-16 18:45:29 +00:00			`def time_diff_args (time_diff):`

			`if time_diff >= 0:`
			`sign = "+"`
			`else:`
			`sign = "-"`

			`secs = abs (time_diff) // SECOND`

			`return "%s%02i:%02i.%09i" % (sign,`
			`secs // 60,`
			`secs % 60,`
			`abs (time_diff) % SECOND,)`

New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`def time_args_no_hours (ts):`

			`secs = ts // SECOND`

			`return "%02i:%02i.%09i" % (secs // 60,`
			`secs % 60,`
			`ts % SECOND,)`

			`def parse_time (st):`

			`"""Parse time strings that look like "0:00:00.0000000"."""`

			`h, m, s = st.split (":")`
			`secs, subsecs = s.split (".")`

			`return (long ((int (h) * 60*2 + int (m) 60) * SECOND) +`
			`long (secs) * SECOND + long (subsecs))`

			`class DebugLevel (int):`

Add 'fixme' and 'trace' log levels 2010-07-06 08:42:08 +00:00			`__names = ["NONE", "ERROR", "WARN", "INFO", "DEBUG", "LOG", "FIXME", "TRACE"]`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`__instances = {}`

			`def __new__ (cls, level):`

			`try:`
			`level_int = int (level)`
			`except (ValueError, TypeError,):`
			`try:`
			`level_int = cls.__names.index (level.upper ())`
			`except ValueError:`
			`raise ValueError ("no debug level named %r" % (level,))`
			`if level_int in cls.__instances:`
			`return cls.__instances[level_int]`
			`else:`
			`new_instance = int.__new__ (cls, level_int)`
			`new_instance.name = cls.__names[level_int]`
			`cls.__instances[level_int] = new_instance`
			`return new_instance`

			`def __repr__ (self):`

			`return "<%s %s (%i)>" % (type (self).__name__, self.__names[self], self,)`

			`def higher_level (self):`

			`if self == len (self.__names) - 1:`
			`raise ValueError ("already the highest debug level")`

			`return DebugLevel (self + 1)`

			`def lower_level (self):`

			`if self == 0:`
			`raise ValueError ("already the lowest debug level")`

			`return DebugLevel (self - 1)`

Rename debug level instances 2007-11-16 10:53:02 +00:00			`debug_level_none = DebugLevel ("NONE")`
			`debug_level_error = DebugLevel ("ERROR")`
			`debug_level_warning = DebugLevel ("WARN")`
			`debug_level_info = DebugLevel ("INFO")`
			`debug_level_debug = DebugLevel ("DEBUG")`
			`debug_level_log = DebugLevel ("LOG")`
Add 'fixme' and 'trace' log levels 2010-07-06 08:42:08 +00:00			`debug_level_fixme = DebugLevel ("FIXME")`
			`debug_level_trace = DebugLevel ("TRACE")`
Use GdkColors for level column, cleanup color handling 2008-01-23 12:51:14 +00:00			`debug_levels = [debug_level_none,`
Add 'fixme' and 'trace' log levels 2010-07-06 08:42:08 +00:00			`debug_level_trace,`
			`debug_level_fixme,`
Use GdkColors for level column, cleanup color handling 2008-01-23 12:51:14 +00:00			`debug_level_log,`
			`debug_level_debug,`
			`debug_level_info,`
			`debug_level_warning,`
			`debug_level_error]`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00
			`# For stripping color codes:`
			`_escape = re.compile ("\x1b\\[[0-9;]*m")`
			`def strip_escape (s):`

Optimize color stripping function a bit 2007-11-19 09:35:27 +00:00			`# FIXME: This can be optimized further!`

			`while "\x1b" in s:`
			`s = _escape.sub ("", s)`
			`return s`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00
			`def default_log_line_regex_ ():`

			`# "DEBUG "`
Add support for recent log format changes, be more tolerant on whitespaces, cope with object names containing '>' 2008-06-29 18:14:07 +00:00			`LEVEL = "([A-Z]+)\s+"`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`# "0x8165430 "`
Add support for recent log format changes, be more tolerant on whitespaces, cope with object names containing '>' 2008-06-29 18:14:07 +00:00			`THREAD = r"(0x[0-9a-f]+)\s+" #r"\((0x[0-9a-f]+) - "`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`# "0:00:00.777913000 "`
Add support for recent log format changes, be more tolerant on whitespaces, cope with object names containing '>' 2008-06-29 18:14:07 +00:00			`TIME = r"(\d+:\d\d:\d\d\.\d+)\s+"`
			`CATEGORY = "([A-Za-z0-9_-]+)\s+" # "GST_REFCOUNTING ", "flacdec "`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`# " 3089 "`
Add support for recent log format changes, be more tolerant on whitespaces, cope with object names containing '>' 2008-06-29 18:14:07 +00:00			`PID = r"(\d+)\s+"`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`FILENAME = r"([^:]+):"`
Add support for recent log format changes, be more tolerant on whitespaces, cope with object names containing '>' 2008-06-29 18:14:07 +00:00			`LINE = r"(\d+):"`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`FUNCTION = "([A-Za-z0-9_]+):"`
			`# FIXME: When non-g(st)object stuff is logged with *_OBJECT (like`
			`# buffers!), the address is printed without <> brackets!`
Add support for recent log format changes, be more tolerant on whitespaces, cope with object names containing '>' 2008-06-29 18:14:07 +00:00			`OBJECT = "(?:<([^>]+)> )?"`
			`MESSAGE = "(.+)"`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00
Use less regex matching to parse lines (does not provide a performance gain though) 2007-11-15 13:06:37 +00:00			`# New log format:`
Add support for recent log format changes, be more tolerant on whitespaces, cope with object names containing '>' 2008-06-29 18:14:07 +00:00			`expressions = [TIME, PID, THREAD, LEVEL, CATEGORY, FILENAME, LINE, FUNCTION,`
			`OBJECT, MESSAGE]`
Use less regex matching to parse lines (does not provide a performance gain though) 2007-11-15 13:06:37 +00:00			`# Old log format:`
			`## expressions = [LEVEL, THREAD, TIME, CATEGORY, PID, FILENAME, LINE,`
			`## FUNCTION, OBJECT, MESSAGE]`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00
			`return expressions`

			`def default_log_line_regex ():`
Cleanup whitespace 2009-03-14 21:02:45 +00:00
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`expressions = default_log_line_regex_ ()`
			`return re.compile ("".join (expressions))`

			`class Producer (object):`

			`def __init__ (self):`

			`self.consumers = []`

			`def have_load_started (self):`

			`for consumer in self.consumers:`
			`consumer.handle_load_started ()`

			`def have_load_finished (self):`

			`for consumer in self.consumers:`
			`consumer.handle_load_finished ()`

Data: fix up out-of-order log lines This is important because we rely on monotonically increasing timestamps for binary searches in various places. Overhead for an already sorted file with 1 million lines is less than 5%. 2011-11-16 19:37:21 +00:00			`class SortHelper (object):`

			`def __init__ (self, fileobj, offsets):`

			`self._gen = self.__gen (fileobj, offsets)`
			`self._gen.next ()`

			`# Override in the instance, for performance (this gets called in an`
			`# inner loop):`
			`self.find_insert_position = self._gen.send`

			`@staticmethod`
			`def find_insert_position (insert_time_string):`

			`# Stub for documentary purposes.`

			`pass`

			`@staticmethod`
			`def __gen (fileobj, offsets):`

			`from math import floor`
			`tell = fileobj.tell`
			`seek = fileobj.seek`
			`read = fileobj.read`
			`time_len = len (time_args (0))`

			`# We remember the previous insertion point. This gives a nice speed up`
			`# for larger bubbles which are already sorted. TODO: In practice, log`
			`# lines only get out of order across threads. Need to check if it pays`
			`# to parse the thread here and maintain multiple insertion points for`
			`# heavily interleaved parts of the log.`
			`pos = 0`
			`pos_time_string = ""`

			`insert_pos = None`
			`while True:`
			`insert_time_string = (yield insert_pos)`

			`save_offset = tell ()`

			`if pos_time_string <= insert_time_string:`
			`lo = pos`
			`hi = len (offsets)`
			`else:`
			`lo = 0`
			`hi = pos`

			`# This is a bisection search, except we don't cut the range in the`
			`# middle each time, but at the 90th percentile. This is because`
			`# logs are "mostly sorted", so the insertion point is much more`
			`# likely to be at the end anyways:`
			`while lo < hi:`
			`mid = int (floor (lo * 0.1 + hi * 0.9))`
			`seek (offsets[mid])`
			`mid_time_string = read (time_len)`
			`if insert_time_string < mid_time_string:`
			`hi = mid`
			`else:`
			`lo = mid + 1`
			`pos = lo`
			`# Caller will replace row at pos with the new one, so this is`
			`# correct:`
			`pos_time_string = insert_time_string`

			`insert_pos = pos`

			`seek (save_offset)`

New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`class LineCache (Producer):`

Fix progress display on load 2007-11-14 08:55:12 +00:00			`_lines_per_iteration = 50000`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00
			`def __init__ (self, fileobj, dispatcher):`

			`Producer.__init__ (self)`

			`self.logger = logging.getLogger ("linecache")`
			`self.dispatcher = dispatcher`

Use the mmapped fileobj in more places and use slice access 2007-12-03 13:45:09 +00:00			`self.__fileobj = fileobj`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`self.__fileobj.seek (0, 2)`
			`self.__file_size = self.__fileobj.tell ()`
			`self.__fileobj.seek (0)`

Ditch arrays for offset storage again 2007-11-21 12:21:38 +00:00			`self.offsets = []`
Use an array for line offset mapping (if file < 4GB) 2007-11-21 09:40:13 +00:00			`self.levels = [] # FIXME`

New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`def start_loading (self):`

			`self.logger.debug ("dispatching load process")`
			`self.have_load_started ()`
			`self.dispatcher (self.__process ())`

			`def get_progress (self):`

			`return float (self.__fileobj.tell ()) / self.__file_size`

			`def __process (self):`

			`offsets = self.offsets`
Add (commented out) support for parsing debug level at line cache scan time 2007-11-16 09:03:22 +00:00			`levels = self.levels`

Add 'fixme' and 'trace' log levels 2010-07-06 08:42:08 +00:00			`dict_levels = {"T" : debug_level_trace, "F" : debug_level_fixme,`
			`"L" : debug_level_log, "D" : debug_level_debug,`
			`"I" : debug_level_info, "W" : debug_level_warning,`
			`"E" : debug_level_error, " " : debug_level_none}`
			`rexp = re.compile (r"\d:\d\d:\d\d\.\d+\s+\d+\s+0x[0-9a-f]+\s+([TFLDIEW ])")`
Add (commented out) support for parsing debug level at line cache scan time 2007-11-16 09:03:22 +00:00
Move more attribute lookups out of loops for speed 2008-06-30 16:48:34 +00:00			`# Moving attribute lookups out of the loop:`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`readline = self.__fileobj.readline`
			`tell = self.__fileobj.tell`
Move more attribute lookups out of loops for speed 2008-06-30 16:48:34 +00:00			`rexp_match = rexp.match`
			`levels_append = levels.append`
			`offsets_append = offsets.append`
			`dict_levels_get = dict_levels.get`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00
			`self.__fileobj.seek (0)`
			`limit = self._lines_per_iteration`
Data: fix up out-of-order log lines This is important because we rely on monotonically increasing timestamps for binary searches in various places. Overhead for an already sorted file with 1 million lines is less than 5%. 2011-11-16 19:37:21 +00:00			`last_line = ""`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`i = 0`
Data: fix up out-of-order log lines This is important because we rely on monotonically increasing timestamps for binary searches in various places. Overhead for an already sorted file with 1 million lines is less than 5%. 2011-11-16 19:37:21 +00:00			`sort_helper = SortHelper (self.__fileobj, offsets)`
			`find_insert_position = sort_helper.find_insert_position`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`while True:`
			`offset = tell ()`
			`line = readline ()`
			`if not line:`
			`break`
Data: fix up out-of-order log lines This is important because we rely on monotonically increasing timestamps for binary searches in various places. Overhead for an already sorted file with 1 million lines is less than 5%. 2011-11-16 19:37:21 +00:00			`# if line[18] == "\x1b":`
			`# line = strip_escape (line)`
Move more attribute lookups out of loops for speed 2008-06-30 16:48:34 +00:00			`match = rexp_match (line)`
Add support for recent log format changes, be more tolerant on whitespaces, cope with object names containing '>' 2008-06-29 18:14:07 +00:00			`if match is None:`
Gracefully handle garbage lines at the line cache level 2007-11-29 12:26:56 +00:00			`continue`
Data: fix up out-of-order log lines This is important because we rely on monotonically increasing timestamps for binary searches in various places. Overhead for an already sorted file with 1 million lines is less than 5%. 2011-11-16 19:37:21 +00:00
			`# Timestamp is in the very beginning of the row, and can be sorted`
			`# by lexical comparison. That's why we don't bother parsing the`
			`# time to integer. We also don't have to take a substring here,`
			`# which would be a useless memcpy.`
			`if line >= last_line:`
			`levels_append (dict_levels_get (match.group (1), debug_level_none))`
			`offsets_append (offset)`
			`last_line = line`
			`else:`
			`pos = find_insert_position (line)`
			`levels.insert (pos, dict_levels_get (match.group (1), debug_level_none))`
			`offsets.insert (pos, offset)`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`i += 1`
Fix progress display on load 2007-11-14 08:55:12 +00:00			`if i >= limit:`
			`i = 0`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`yield True`

			`self.have_load_finished ()`
			`yield False`

Move final log line parsing from GUI to Data module 2007-11-19 13:27:16 +00:00			`class LogLine (list):`

And now make it actually run\! 2007-11-19 13:52:01 +00:00			`_line_regex = default_log_line_regex ()`

Move final log line parsing from GUI to Data module 2007-11-19 13:27:16 +00:00			`@classmethod`
			`def parse_full (cls, line_string):`

Add support for recent log format changes, be more tolerant on whitespaces, cope with object names containing '>' 2008-06-29 18:14:07 +00:00			`match = cls._line_regex.match (line_string)`
Move final log line parsing from GUI to Data module 2007-11-19 13:27:16 +00:00			`if match is None:`
Add support for recent log format changes, be more tolerant on whitespaces, cope with object names containing '>' 2008-06-29 18:14:07 +00:00			`## raise ValueError ("not a valid log line (%r)" % (line_string,))`
			`groups = [0, 0, 0, 0, "", "", 0, "", "", 0]`
			`return cls (groups)`

			`line = cls (match.groups ())`
			`# Timestamp.`
			`line[0] = parse_time (line[0])`
			`# PID.`
			`line[1] = int (line[1])`
			`# Thread.`
			`line[2] = int (line[2], 16)`
			`# Level (this is handled in LineCache).`
			`line[3] = 0`
			`# Line.`
			`line[6] = int (line[6])`
			`# Message start offset.`
			`line[9] = match.start (9 + 1)`

			`for col_id in (4, # COL_CATEGORY`
			`5, # COL_FILENAME`
			`7, # COL_FUNCTION,`
			`8,): # COL_OBJECT`
			`line[col_id] = intern (line[col_id] or "")`

			`return line`
Move final log line parsing from GUI to Data module 2007-11-19 13:27:16 +00:00
Add LogLines class 2007-11-21 08:47:40 +00:00			`class LogLines (object):`

			`def __init__ (self, fileobj, line_cache):`

			`self.__fileobj = fileobj`
			`self.__line_cache = line_cache`

			`def __len__ (self):`

			`return len (self.__line_cache.offsets)`

			`def __getitem__ (self, line_index):`

			`offset = self.__line_cache.offsets[line_index]`
			`self.__fileobj.seek (offset)`
			`line_string = self.__fileobj.readline ()`
			`line = LogLine.parse_full (line_string)`
			`msg = line_string[line[-1]:]`
			`line[-1] = msg`
			`return line`

			`def __iter__ (self):`

			`l = len (self)`
			`i = 0`
			`while i < l:`
			`yield self[i]`
			`i += 1`

New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`class LogFile (Producer):`

			`def __init__ (self, filename, dispatcher):`

Use the mmapped fileobj in more places and use slice access 2007-12-03 13:45:09 +00:00			`import mmap`

New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`Producer.__init__ (self)`

			`self.logger = logging.getLogger ("logfile")`

Add reload file functionality 2007-11-26 13:31:13 +00:00			`self.path = os.path.normpath (os.path.abspath (filename))`
Use the mmapped fileobj in more places and use slice access 2007-12-03 13:45:09 +00:00			`self.__real_fileobj = file (filename, "rb")`
Use mmap in a portable way 2008-11-29 19:00:20 +00:00			`self.fileobj = mmap.mmap (self.__real_fileobj.fileno (), 0, access = mmap.ACCESS_READ)`
New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`self.line_cache = LineCache (self.fileobj, dispatcher)`
			`self.line_cache.consumers.append (self)`

Almost allow copying a full line to clipboard 2007-11-20 12:52:26 +00:00			`def get_full_line (self, line_index):`

			`offset = self.line_cache.offsets[line_index]`
			`self.fileobj.seek (offset)`
			`line_string = self.fileobj.readline ()`
			`line = LogLine.parse_full (line_string)`
			`msg = line_string[line[-1]:]`
			`line[-1] = msg`
			`return line`

New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`def start_loading (self):`

			`self.logger.debug ("starting load")`
			`self.line_cache.start_loading ()`

			`def get_load_progress (self):`

			`return self.line_cache.get_progress ()`

			`def handle_load_started (self):`

			`# Chain up to our consumers:`
			`self.have_load_started ()`

			`def handle_load_finished (self):`

Add LogLines class 2007-11-21 08:47:40 +00:00			`self.lines = LogLines (self.fileobj, self.line_cache)`

New import (the old repo got busted, just had 4 revs anyways) 2007-11-14 08:30:19 +00:00			`# Chain up to our consumers:`
			`self.have_load_finished ()`