subparse: support for more than 32 unclosed markup tags

https://bugzilla.gnome.org/show_bug.cgi?id=796043
This commit is contained in:
Philippe Normand 2018-05-12 13:53:02 +01:00
parent afca8d4815
commit 8cfd85ec61
2 changed files with 14 additions and 9 deletions

View file

@ -758,7 +758,7 @@ static void
subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr) subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr)
{ {
gchar *cur, *next_tag; gchar *cur, *next_tag;
gchar *open_tags[32]; GPtrArray *open_tags = NULL;
guint num_open_tags = 0; guint num_open_tags = 0;
const gchar *iter_tag; const gchar *iter_tag;
guint offset = 0; guint offset = 0;
@ -771,6 +771,7 @@ subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr)
g_assert (*p_txt != NULL); g_assert (*p_txt != NULL);
open_tags = g_ptr_array_new_with_free_func (g_free);
cur = *p_txt; cur = *p_txt;
while (*cur != '\0') { while (*cur != '\0') {
next_tag = strchr (cur, '<'); next_tag = strchr (cur, '<');
@ -800,7 +801,7 @@ subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr)
index++; index++;
if (offset) { if (offset) {
/* OK we found a tag, let's keep track of it */ /* OK we found a tag, let's keep track of it */
open_tags[num_open_tags] = g_strdup (iter_tag); g_ptr_array_add (open_tags, g_ascii_strdown (iter_tag, -1));
++num_open_tags; ++num_open_tags;
break; break;
} }
@ -815,15 +816,15 @@ subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr)
if (*next_tag == '<' && *(next_tag + 1) == '/') { if (*next_tag == '<' && *(next_tag + 1) == '/') {
end_tag = strchr (cur, '>'); end_tag = strchr (cur, '>');
if (end_tag) { if (end_tag) {
const gchar *last = g_ptr_array_index (open_tags, num_open_tags - 1);
if (num_open_tags == 0 if (num_open_tags == 0
|| g_ascii_strncasecmp (end_tag - 1, open_tags[num_open_tags - 1], || g_ascii_strncasecmp (end_tag - 1, last, strlen (last))) {
strlen (open_tags[num_open_tags - 1]))) { GST_LOG ("broken input, closing tag '%s' is not open", end_tag - 1);
GST_LOG ("broken input, closing tag '%s' is not open", next_tag);
memmove (next_tag, end_tag + 1, strlen (end_tag) + 1); memmove (next_tag, end_tag + 1, strlen (end_tag) + 1);
next_tag -= strlen (end_tag); next_tag -= strlen (end_tag);
} else { } else {
--num_open_tags; --num_open_tags;
g_free (open_tags[num_open_tags]); g_ptr_array_remove_index (open_tags, num_open_tags);
} }
} }
} }
@ -836,17 +837,18 @@ subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr)
s = g_string_new (*p_txt); s = g_string_new (*p_txt);
while (num_open_tags > 0) { while (num_open_tags > 0) {
GST_LOG ("adding missing closing tag '%s'", open_tags[num_open_tags - 1]); GST_LOG ("adding missing closing tag '%s'", g_ptr_array_index (open_tags,
num_open_tags - 1));
g_string_append_c (s, '<'); g_string_append_c (s, '<');
g_string_append_c (s, '/'); g_string_append_c (s, '/');
g_string_append (s, open_tags[num_open_tags - 1]); g_string_append (s, g_ptr_array_index (open_tags, num_open_tags - 1));
g_string_append_c (s, '>'); g_string_append_c (s, '>');
g_free (open_tags[num_open_tags - 1]);
--num_open_tags; --num_open_tags;
} }
g_free (*p_txt); g_free (*p_txt);
*p_txt = g_string_free (s, FALSE); *p_txt = g_string_free (s, FALSE);
} }
g_ptr_array_free (open_tags, TRUE);
} }
static gboolean static gboolean

View file

@ -93,6 +93,9 @@ static SubParseInputChunk srt_input[] = {
11 * GST_SECOND, 12 * GST_SECOND, "<i>xyz</i>"}, { 11 * GST_SECOND, 12 * GST_SECOND, "<i>xyz</i>"}, {
"12\n00:00:12,000 --> 00:00:13,000\n<i>xyz</b>\n\n", "12\n00:00:12,000 --> 00:00:13,000\n<i>xyz</b>\n\n",
12 * GST_SECOND, 13 * GST_SECOND, "<i>xyz</i>"}, { 12 * GST_SECOND, 13 * GST_SECOND, "<i>xyz</i>"}, {
"13\n00:00:13,000 --> 00:00:14,000\n<i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i>Keep them comiiiiiing\n\n",
13 * GST_SECOND, 14 * GST_SECOND,
"<i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i><i>Keep them comiiiiiing</i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i></i>"}, {
/* skip a few chunk numbers here, the numbers shouldn't matter */ /* skip a few chunk numbers here, the numbers shouldn't matter */
"24\n00:01:00,000 --> 00:02:00,000\nYep, still here\n\n", "24\n00:01:00,000 --> 00:02:00,000\nYep, still here\n\n",
60 * GST_SECOND, 120 * GST_SECOND, "Yep, still here"}, { 60 * GST_SECOND, 120 * GST_SECOND, "Yep, still here"}, {