mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-10 17:35:59 +00:00
subparse: fix typefind with small srt files
The typefind code was rejecting content smaller than 128 bytes making it impossible to play files with very small srt files. But those can actually be properly detected so fix typefind to allow smaller content and try its best with it. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/6937>
This commit is contained in:
parent
f7c8f4bb26
commit
81de6b7738
3 changed files with 44 additions and 10 deletions
|
@ -291,25 +291,36 @@ gst_sub_parse_type_find (GstTypeFind * tf, gpointer private)
|
||||||
{
|
{
|
||||||
GstSubParseFormat format;
|
GstSubParseFormat format;
|
||||||
const guint8 *data;
|
const guint8 *data;
|
||||||
|
guint64 data_len = 128, checked_len;
|
||||||
GstCaps *caps;
|
GstCaps *caps;
|
||||||
gchar *str;
|
gchar *str;
|
||||||
gchar *encoding = NULL;
|
gchar *encoding = NULL;
|
||||||
const gchar *end;
|
const gchar *end;
|
||||||
|
|
||||||
if (!(data = gst_type_find_peek (tf, 0, 129)))
|
/* use the first 128 bytes for detection, if available */
|
||||||
return;
|
data = gst_type_find_peek (tf, 0, data_len);
|
||||||
|
if (!data) {
|
||||||
|
/* less that 128 bytes are available, try to detect using whatever is available */
|
||||||
|
data_len = gst_type_find_get_length (tf);
|
||||||
|
if (data_len == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
data = gst_type_find_peek (tf, 0, data_len);
|
||||||
|
if (!data)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* make sure string passed to _autodetect() is NUL-terminated */
|
/* make sure string passed to _autodetect() is NUL-terminated */
|
||||||
str = g_malloc0 (129);
|
str = g_malloc0 (data_len + 1);
|
||||||
memcpy (str, data, 128);
|
memcpy (str, data, data_len);
|
||||||
|
|
||||||
if ((encoding = gst_sub_parse_detect_encoding (str, 128)) != NULL) {
|
if ((encoding = gst_sub_parse_detect_encoding (str, data_len)) != NULL) {
|
||||||
gchar *converted_str;
|
gchar *converted_str;
|
||||||
GError *err = NULL;
|
GError *err = NULL;
|
||||||
gsize tmp;
|
gsize tmp;
|
||||||
|
|
||||||
converted_str =
|
converted_str =
|
||||||
gst_sub_parse_gst_convert_to_utf8 (str, 128, encoding, &tmp, &err);
|
gst_sub_parse_gst_convert_to_utf8 (str, data_len, encoding, &tmp, &err);
|
||||||
if (converted_str == NULL) {
|
if (converted_str == NULL) {
|
||||||
GST_DEBUG ("Encoding '%s' detected but conversion failed: %s", encoding,
|
GST_DEBUG ("Encoding '%s' detected but conversion failed: %s", encoding,
|
||||||
err->message);
|
err->message);
|
||||||
|
@ -321,9 +332,15 @@ gst_sub_parse_type_find (GstTypeFind * tf, gpointer private)
|
||||||
g_free (encoding);
|
g_free (encoding);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check if at least the first 120 chars are valid UTF8,
|
/* Check if content is valid UTF-8 but allow for the 8 last bytes to not be in
|
||||||
* otherwise convert as always */
|
* case of incomplete unicode sequence. */
|
||||||
if (!g_utf8_validate (str, 128, &end) && (end - str) < 120) {
|
if (data_len > 8)
|
||||||
|
checked_len = data_len - 8;
|
||||||
|
else
|
||||||
|
checked_len = data_len;
|
||||||
|
|
||||||
|
if (!g_utf8_validate (str, data_len, &end) && (end - str) < checked_len) {
|
||||||
|
/* Invalid UTF-8, try converting */
|
||||||
gchar *converted_str;
|
gchar *converted_str;
|
||||||
gsize tmp;
|
gsize tmp;
|
||||||
const gchar *enc;
|
const gchar *enc;
|
||||||
|
@ -337,7 +354,7 @@ gst_sub_parse_type_find (GstTypeFind * tf, gpointer private)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
converted_str =
|
converted_str =
|
||||||
gst_sub_parse_gst_convert_to_utf8 (str, 128, enc, &tmp, NULL);
|
gst_sub_parse_gst_convert_to_utf8 (str, data_len, enc, &tmp, NULL);
|
||||||
if (converted_str != NULL) {
|
if (converted_str != NULL) {
|
||||||
g_free (str);
|
g_free (str);
|
||||||
str = converted_str;
|
str = converted_str;
|
||||||
|
|
|
@ -512,11 +512,25 @@ GST_START_TEST (test_subparse)
|
||||||
{
|
{
|
||||||
const gchar *type;
|
const gchar *type;
|
||||||
GstCaps *caps = NULL;
|
GstCaps *caps = NULL;
|
||||||
|
GstTypeFindProbability prob;
|
||||||
|
guint8 one_byte[] = {
|
||||||
|
'A',
|
||||||
|
};
|
||||||
|
|
||||||
caps = typefind_test_file ("subrip.srt");
|
caps = typefind_test_file ("subrip.srt");
|
||||||
type = gst_structure_get_name (gst_caps_get_structure (caps, 0));
|
type = gst_structure_get_name (gst_caps_get_structure (caps, 0));
|
||||||
fail_unless_equals_string (type, "application/x-subtitle");
|
fail_unless_equals_string (type, "application/x-subtitle");
|
||||||
gst_caps_unref (caps);
|
gst_caps_unref (caps);
|
||||||
|
|
||||||
|
caps = typefind_test_file ("subrip-short.srt");
|
||||||
|
type = gst_structure_get_name (gst_caps_get_structure (caps, 0));
|
||||||
|
fail_unless_equals_string (type, "application/x-subtitle");
|
||||||
|
gst_caps_unref (caps);
|
||||||
|
|
||||||
|
/* check that one byte content does not crash subparse typefinder */
|
||||||
|
prob = 0;
|
||||||
|
caps = typefind_data (one_byte, sizeof (one_byte), &prob);
|
||||||
|
fail_unless (caps == NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
GST_END_TEST;
|
GST_END_TEST;
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
1
|
||||||
|
00:00:01,000 --> 00:00:02,000
|
||||||
|
One
|
Loading…
Reference in a new issue