typefindfunctions: Make XML typefinder more strict

If a XMLDec is found, check also for its end. Similarly, check for the
end of the XML tag we're looking for and make sure that the following
characters are valid.

Fixes https://gitlab.freedesktop.org/gstreamer/gstreamer/-/issues/1536

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/3632>
This commit is contained in:
Sebastian Dröge 2022-12-23 17:25:06 +02:00 committed by Sebastian Dröge
parent 1eeea942de
commit 11520403a5

View file

@ -534,73 +534,115 @@ hls_type_find (GstTypeFind * tf, gpointer unused)
/*** application/xml **********************************************************/
#define XML_BUFFER_SIZE 16
#define XML_INC_BUFFER { \
pos++; \
if (pos == XML_BUFFER_SIZE) { \
pos = 0; \
offset += XML_BUFFER_SIZE; \
data = gst_type_find_peek (tf, offset, XML_BUFFER_SIZE); \
if (data == NULL) return FALSE; \
} else { \
data++; \
} \
}
#define XML_INC_BUFFER_DATA { \
pos++; \
if (pos >= length) { \
return FALSE; \
} else { \
data++; \
} \
}
static gboolean
xml_check_first_element_from_data (const guint8 * data, guint length,
const gchar * element, guint elen, gboolean strict)
{
gboolean got_xmldec;
guint pos = 0;
const guint8 *ptr;
g_return_val_if_fail (data != NULL, FALSE);
if (length <= 5)
/* search for an opening tag */
ptr = memchr (data, '<', length);
if (!ptr)
return FALSE;
length -= (ptr - data);
data = ptr;
if (length < 5)
return FALSE;
/* look for the XMLDec
* see XML spec 2.8, Prolog and Document Type Declaration
* http://www.w3.org/TR/2004/REC-xml-20040204/#sec-prolog-dtd */
got_xmldec = (memcmp (data, "<?xml", 5) == 0);
if (got_xmldec) {
/* look for ending ?> */
data += 5;
length -= 5;
ptr = memchr (data, '?', length);
if (!ptr)
return FALSE;
length -= (ptr - data);
data = ptr;
got_xmldec = (memcmp (data, "?>", 2) == 0);
if (!got_xmldec)
return FALSE;
data += 2;
length -= 2;
}
if (strict && !got_xmldec)
return FALSE;
/* skip XMLDec in any case if we've got one */
if (got_xmldec) {
pos += 5;
data += 5;
/* search for the next opening tag */
ptr = memchr (data, '<', length);
if (!ptr)
return FALSE;
length -= (ptr - data);
data = ptr;
}
/* look for the first element, it has to be the requested element. Bail
* out if it is not within the first 4kB. */
while (pos < MIN (4096, length)) {
while (*data != '<' && pos < MIN (4096, length)) {
XML_INC_BUFFER_DATA;
}
/* skip XML comments */
while (length >= 4 && memcmp (data, "<!--", 4) == 0) {
data += 4;
length -= 4;
XML_INC_BUFFER_DATA;
if (!g_ascii_isalpha (*data)) {
/* if not alphabetic, it's a PI or an element / attribute declaration
* like <?xxx or <!xxx */
XML_INC_BUFFER_DATA;
continue;
}
/* the first normal element, check if it's the one asked for */
if (pos + elen + 1 >= length)
ptr = (const guint8 *) g_strstr_len ((const gchar *) data, length, "-->");
if (!ptr)
return FALSE;
return (element && strncmp ((const char *) data, element, elen) == 0);
ptr += 3;
length -= (ptr - data);
data = ptr;
/* search for the next opening tag */
ptr = memchr (data, '<', length);
if (!ptr)
return FALSE;
length -= (ptr - data);
data = ptr;
}
if (elen == 0)
return TRUE;
/* look for the first element, it has to be the requested element. Bail
* out otherwise. */
if (length < elen + 1)
return FALSE;
data += 1;
length -= 1;
if (memcmp (data, element, elen) != 0)
return FALSE;
data += elen;
length -= elen;
/* check if there's a closing `>` following */
ptr = memchr (data, '>', length);
if (!ptr)
return FALSE;
/* between `<elem` and `>`, there should only be spaces, alphanum or `:`
* until the first `=` for an attribute value */
while (data < ptr) {
if (*data == '>' || *data == '=')
return TRUE;
if (!g_ascii_isprint (*data) && *data != '\n' && *data != '\r')
return FALSE;
data++;
}
return FALSE;
@ -610,50 +652,25 @@ static gboolean
xml_check_first_element (GstTypeFind * tf, const gchar * element, guint elen,
gboolean strict)
{
gboolean got_xmldec;
const guint8 *data;
guint offset = 0;
guint pos = 0;
guint length;
data = gst_type_find_peek (tf, 0, XML_BUFFER_SIZE);
length = gst_type_find_get_length (tf);
/* try a default that should be enough */
if (length == 0)
length = 512;
else if (length < 64)
return FALSE;
else /* the first few bytes should be enough */
length = MIN (4096, length);
data = gst_type_find_peek (tf, 0, length);
if (!data)
return FALSE;
/* look for the XMLDec
* see XML spec 2.8, Prolog and Document Type Declaration
* http://www.w3.org/TR/2004/REC-xml-20040204/#sec-prolog-dtd */
got_xmldec = (memcmp (data, "<?xml", 5) == 0);
if (strict && !got_xmldec)
return FALSE;
/* skip XMLDec in any case if we've got one */
if (got_xmldec) {
pos += 5;
data += 5;
}
/* look for the first element, it has to be the requested element. Bail
* out if it is not within the first 4kB. */
while (data && (offset + pos) < 4096) {
while (*data != '<' && (offset + pos) < 4096) {
XML_INC_BUFFER;
}
XML_INC_BUFFER;
if (!g_ascii_isalpha (*data)) {
/* if not alphabetic, it's a PI or an element / attribute declaration
* like <?xxx or <!xxx */
XML_INC_BUFFER;
continue;
}
/* the first normal element, check if it's the one asked for */
data = gst_type_find_peek (tf, offset + pos, elen + 1);
return (data && element && strncmp ((char *) data, element, elen) == 0);
}
return FALSE;
return xml_check_first_element_from_data (data, length, element, elen,
strict);
}
static GstStaticCaps generic_xml_caps = GST_STATIC_CAPS ("application/xml");