validate/baseclasses: Don't leak several hundred MB of XML

The xml-based MediaDescriptor were keeping open the XML file and the
associated ElementTree structures, resulting in memory usage of several
hundred megabytes.

Instead cache the information we need immediately and release the
XML structure
This commit is contained in:
Edward Hervey 2017-12-18 09:48:21 +01:00 committed by Edward Hervey
parent aa8e27f2a3
commit cb04515cbd

View file

@ -2024,18 +2024,40 @@ class GstValidateMediaDescriptor(MediaDescriptor):
self._xml_path = xml_path self._xml_path = xml_path
try: try:
self.media_xml = ET.parse(xml_path).getroot() media_xml = ET.parse(xml_path).getroot()
except xml.etree.ElementTree.ParseError: except xml.etree.ElementTree.ParseError:
printc("Could not parse %s" % xml_path, printc("Could not parse %s" % xml_path,
Colors.FAIL) Colors.FAIL)
raise raise
# Sanity checks self._extract_data (media_xml)
self.media_xml.attrib["duration"]
self.media_xml.attrib["seekable"]
self.set_protocol(urllib.parse.urlparse(urllib.parse.urlparse(self.get_uri()).scheme).scheme) self.set_protocol(urllib.parse.urlparse(urllib.parse.urlparse(self.get_uri()).scheme).scheme)
def _extract_data(self, media_xml):
# Extract the information we need from the xml
self._caps = media_xml.findall("streams")[0].attrib["caps"]
self._track_caps = []
try:
streams = media_xml.findall("streams")[0].findall("stream")
except IndexError:
pass
else:
for stream in streams:
self._track_caps.append((stream.attrib["type"], stream.attrib["caps"]))
self._uri = media_xml.attrib["uri"]
self._duration = int(media_xml.attrib["duration"])
self._protocol = media_xml.get("protocol", None)
self._is_seekable = media_xml.attrib["seekable"].lower() == "true"
self._is_live = media_xml.get("live", "false").lower() == "true"
self._is_image = False
for stream in media_xml.findall("streams")[0].findall("stream"):
if stream.attrib["type"] == "image":
self._is_image = True
self._track_types = []
for stream in media_xml.findall("streams")[0].findall("stream"):
self._track_types.append(stream.attrib["type"])
@staticmethod @staticmethod
def new_from_uri(uri, verbose=False, include_frames=False): def new_from_uri(uri, verbose=False, include_frames=False):
""" """
@ -2100,51 +2122,39 @@ class GstValidateMediaDescriptor(MediaDescriptor):
return self._xml_path.replace("." + self.STREAM_INFO_EXT, "") return self._xml_path.replace("." + self.STREAM_INFO_EXT, "")
def get_caps(self): def get_caps(self):
return self.media_xml.findall("streams")[0].attrib["caps"] return self._caps
def get_tracks_caps(self): def get_tracks_caps(self):
res = [] return self._track_caps
try:
streams = self.media_xml.findall("streams")[0].findall("stream")
except IndexError:
return res
for stream in streams:
res.append((stream.attrib["type"], stream.attrib["caps"]))
return res
def get_uri(self): def get_uri(self):
return self.media_xml.attrib["uri"] return self._uri
def get_duration(self): def get_duration(self):
return int(self.media_xml.attrib["duration"]) return self._duration
def set_protocol(self, protocol): def set_protocol(self, protocol):
self.media_xml.attrib["protocol"] = protocol self._protocol = protocol
def get_protocol(self): def get_protocol(self):
return self.media_xml.attrib["protocol"] return self._protocol
def is_seekable(self): def is_seekable(self):
return self.media_xml.attrib["seekable"].lower() == "true" return self._is_seekable
def is_live(self): def is_live(self):
return self.media_xml.get("live", "false").lower() == "true" return self._is_live
def can_play_reverse(self): def can_play_reverse(self):
return True return True
def is_image(self): def is_image(self):
for stream in self.media_xml.findall("streams")[0].findall("stream"): return self._is_image
if stream.attrib["type"] == "image":
return True
return False
def get_num_tracks(self, track_type): def get_num_tracks(self, track_type):
n = 0 n = 0
for stream in self.media_xml.findall("streams")[0].findall("stream"): for t in self._track_types:
if stream.attrib["type"] == track_type: if t == track_type:
n += 1 n += 1
return n return n