diff --git a/subprojects/gst-plugins-base/gst/subparse/gstsubparseelement.c b/subprojects/gst-plugins-base/gst/subparse/gstsubparseelement.c index 9c03ee10b8..97fb83d8ad 100644 --- a/subprojects/gst-plugins-base/gst/subparse/gstsubparseelement.c +++ b/subprojects/gst-plugins-base/gst/subparse/gstsubparseelement.c @@ -291,25 +291,36 @@ gst_sub_parse_type_find (GstTypeFind * tf, gpointer private) { GstSubParseFormat format; const guint8 *data; + guint64 data_len = 128, checked_len; GstCaps *caps; gchar *str; gchar *encoding = NULL; const gchar *end; - if (!(data = gst_type_find_peek (tf, 0, 129))) - return; + /* use the first 128 bytes for detection, if available */ + data = gst_type_find_peek (tf, 0, data_len); + if (!data) { + /* less that 128 bytes are available, try to detect using whatever is available */ + data_len = gst_type_find_get_length (tf); + if (data_len == 0) + return; + + data = gst_type_find_peek (tf, 0, data_len); + if (!data) + return; + } /* make sure string passed to _autodetect() is NUL-terminated */ - str = g_malloc0 (129); - memcpy (str, data, 128); + str = g_malloc0 (data_len + 1); + memcpy (str, data, data_len); - if ((encoding = gst_sub_parse_detect_encoding (str, 128)) != NULL) { + if ((encoding = gst_sub_parse_detect_encoding (str, data_len)) != NULL) { gchar *converted_str; GError *err = NULL; gsize tmp; converted_str = - gst_sub_parse_gst_convert_to_utf8 (str, 128, encoding, &tmp, &err); + gst_sub_parse_gst_convert_to_utf8 (str, data_len, encoding, &tmp, &err); if (converted_str == NULL) { GST_DEBUG ("Encoding '%s' detected but conversion failed: %s", encoding, err->message); @@ -321,9 +332,15 @@ gst_sub_parse_type_find (GstTypeFind * tf, gpointer private) g_free (encoding); } - /* Check if at least the first 120 chars are valid UTF8, - * otherwise convert as always */ - if (!g_utf8_validate (str, 128, &end) && (end - str) < 120) { + /* Check if content is valid UTF-8 but allow for the 8 last bytes to not be in + * case of incomplete unicode sequence. */ + if (data_len > 8) + checked_len = data_len - 8; + else + checked_len = data_len; + + if (!g_utf8_validate (str, data_len, &end) && (end - str) < checked_len) { + /* Invalid UTF-8, try converting */ gchar *converted_str; gsize tmp; const gchar *enc; @@ -337,7 +354,7 @@ gst_sub_parse_type_find (GstTypeFind * tf, gpointer private) } } converted_str = - gst_sub_parse_gst_convert_to_utf8 (str, 128, enc, &tmp, NULL); + gst_sub_parse_gst_convert_to_utf8 (str, data_len, enc, &tmp, NULL); if (converted_str != NULL) { g_free (str); str = converted_str; diff --git a/subprojects/gst-plugins-base/tests/check/gst/typefindfunctions.c b/subprojects/gst-plugins-base/tests/check/gst/typefindfunctions.c index 9c47bb4436..f5539ad2ab 100644 --- a/subprojects/gst-plugins-base/tests/check/gst/typefindfunctions.c +++ b/subprojects/gst-plugins-base/tests/check/gst/typefindfunctions.c @@ -512,11 +512,25 @@ GST_START_TEST (test_subparse) { const gchar *type; GstCaps *caps = NULL; + GstTypeFindProbability prob; + guint8 one_byte[] = { + 'A', + }; caps = typefind_test_file ("subrip.srt"); type = gst_structure_get_name (gst_caps_get_structure (caps, 0)); fail_unless_equals_string (type, "application/x-subtitle"); gst_caps_unref (caps); + + caps = typefind_test_file ("subrip-short.srt"); + type = gst_structure_get_name (gst_caps_get_structure (caps, 0)); + fail_unless_equals_string (type, "application/x-subtitle"); + gst_caps_unref (caps); + + /* check that one byte content does not crash subparse typefinder */ + prob = 0; + caps = typefind_data (one_byte, sizeof (one_byte), &prob); + fail_unless (caps == NULL); } GST_END_TEST; diff --git a/subprojects/gst-plugins-base/tests/files/subrip-short.srt b/subprojects/gst-plugins-base/tests/files/subrip-short.srt new file mode 100644 index 0000000000..42ee5ffba3 --- /dev/null +++ b/subprojects/gst-plugins-base/tests/files/subrip-short.srt @@ -0,0 +1,3 @@ +1 +00:00:01,000 --> 00:00:02,000 +One