typefindfunctions: Fix WebVTT format detection

If WebVTT file consists of "WebVTT" header without body,
the file size can be smaller than 10 bytes.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1359>
This commit is contained in:
Seungha Yang 2021-08-25 20:18:20 +09:00 committed by Sebastian Dröge
parent c38afa2070
commit 21dd2076f6
2 changed files with 49 additions and 10 deletions

View file

@ -5018,26 +5018,33 @@ static void
webvtt_type_find (GstTypeFind * tf, gpointer private)
{
const guint8 *data;
static const guint8 webvtt_with_bom[] = {
0xef, 0xbb, 0xbf, 'W', 'E', 'B', 'V', 'T', 'T'
};
data = gst_type_find_peek (tf, 0, 10);
data = gst_type_find_peek (tf, 0, 7);
if (data == NULL)
return;
/* there might be a UTF-8 BOM at the beginning */
if (memcmp (data, "WEBVTT", 6) != 0 && memcmp (data + 3, "WEBVTT", 6) != 0) {
return;
}
if (memcmp (data, "WEBVTT", 6) == 0) {
data += 6;
} else {
data = gst_type_find_peek (tf, 0, 10);
if (data[0] != 'W') {
if (data[0] != 0xef || data[1] != 0xbb || data[2] != 0xbf)
return; /* Not a UTF-8 BOM */
data += 3;
if (!data)
return;
/* there might be a UTF-8 BOM at the beginning */
if (memcmp (data, webvtt_with_bom, sizeof (webvtt_with_bom)) != 0)
return;
data += 9;
}
/* After the WEBVTT magic must be one of these chars:
* 0x20 (space), 0x9 (tab), 0xa (LF) or 0xd (CR) */
if (data[6] != 0x20 && data[6] != 0x9 && data[6] != 0xa && data[6] != 0xd) {
if (data[0] != 0x20 && data[0] != 0x9 && data[0] != 0xa && data[0] != 0xd) {
return;
}

View file

@ -477,6 +477,37 @@ GST_START_TEST (test_manifest_typefinding)
GST_END_TEST;
GST_START_TEST (test_webvtt)
{
GstTypeFindProbability prob;
const gchar *media_type;
GstCaps *caps;
guint8 webvtt[] = {
'W', 'E', 'B', 'V', 'T', 'T', '\n', '\n'
};
guint8 webvtt_with_bom[] = {
0xef, 0xbb, 0xbf, 'W', 'E', 'B', 'V', 'T', 'T', '\n', '\n'
};
prob = 0;
caps = typefind_data (webvtt, sizeof (webvtt), &prob);
fail_unless (caps != NULL);
media_type = gst_structure_get_name (gst_caps_get_structure (caps, 0));
fail_unless_equals_string (media_type, "application/x-subtitle-vtt");
fail_unless_equals_int (prob, GST_TYPE_FIND_MAXIMUM);
gst_caps_unref (caps);
prob = 0;
caps = typefind_data (webvtt_with_bom, sizeof (webvtt_with_bom), &prob);
fail_unless (caps != NULL);
media_type = gst_structure_get_name (gst_caps_get_structure (caps, 0));
fail_unless_equals_string (media_type, "application/x-subtitle-vtt");
fail_unless_equals_int (prob, GST_TYPE_FIND_MAXIMUM);
gst_caps_unref (caps);
}
GST_END_TEST;
static Suite *
typefindfunctions_suite (void)
{
@ -494,6 +525,7 @@ typefindfunctions_suite (void)
tcase_add_test (tc_chain, test_random_data);
tcase_add_test (tc_chain, test_hls_m3u8);
tcase_add_test (tc_chain, test_manifest_typefinding);
tcase_add_test (tc_chain, test_webvtt);
return s;
}