diff --git a/ChangeLog b/ChangeLog index 23f628a96a..b93d1fcfc8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2006-10-20 Tim-Philipp Müller + + * gst/subparse/gstsubparse.c: (subrip_fix_up_markup), + (parse_subrip), (handle_buffer): + Add missing closing tags for markup and fix broken markup, + otherwise pango won't render anything (fixes #357531). Also, + make sure the text we send out is always NUL-terminated + (better safe than sorry etc.). + + * tests/check/elements/subparse.c: (test_srt_do_test), + (test_srt): + Some more tests for .srt incl. tests for the above stuff. + 2006-10-20 Julien MOUTTE * sys/ximage/ximagesink.c: (gst_ximagesink_ximage_put): diff --git a/gst/subparse/gstsubparse.c b/gst/subparse/gstsubparse.c index d17be39f00..b8942a34c4 100644 --- a/gst/subparse/gstsubparse.c +++ b/gst/subparse/gstsubparse.c @@ -1,6 +1,7 @@ /* GStreamer * Copyright (C) <1999> Erik Walthinsen - * Copyright (c) 2004 Ronald S. Bultje + * Copyright (C) 2004 Ronald S. Bultje + * Copyright (C) 2006 Tim-Philipp Müller * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -528,6 +529,71 @@ subrip_unescape_formatting (gchar * txt) } } +/* we only allow , and , so let's take a simple approach. This code + * assumes the input has been escaped and subrip_unescape_formatting() has then + * been run over the input! This function adds missing closing markup tags and + * removes broken closing tags for tags that have never been opened. */ +static void +subrip_fix_up_markup (gchar ** p_txt) +{ + gchar *cur, *next_tag; + gchar open_tags[32]; + guint num_open_tags = 0; + + g_assert (*p_txt != NULL); + + cur = *p_txt; + while (*cur != '\0') { + next_tag = strchr (cur, '<'); + if (next_tag == NULL) + break; + ++next_tag; + switch (*next_tag) { + case '/':{ + ++next_tag; + if (num_open_tags == 0 || open_tags[num_open_tags - 1] != *next_tag) { + GST_LOG ("broken input, closing tag '%c' is not open", *next_tag); + g_memmove (next_tag - 2, next_tag + 2, strlen (next_tag + 2) + 1); + next_tag -= 2; + } else { + /* it's all good, closing tag which is open */ + --num_open_tags; + } + break; + } + case 'i': + case 'b': + case 'u': + if (num_open_tags == G_N_ELEMENTS (open_tags)) + return; /* something dodgy is going on, stop parsing */ + open_tags[num_open_tags] = *next_tag; + ++num_open_tags; + break; + default: + GST_ERROR ("unexpected tag '%c' (%s)", *next_tag, next_tag); + g_assert_not_reached (); + break; + } + cur = next_tag; + } + + if (num_open_tags > 0) { + GString *s; + + s = g_string_new (*p_txt); + while (num_open_tags > 0) { + GST_LOG ("adding missing closing tag '%c'", open_tags[num_open_tags - 1]); + g_string_append_c (s, '<'); + g_string_append_c (s, '/'); + g_string_append_c (s, open_tags[num_open_tags - 1]); + g_string_append_c (s, '>'); + --num_open_tags; + } + g_free (*p_txt); + *p_txt = g_string_free (s, FALSE); + } +} + static gchar * parse_subrip (ParserState * state, const gchar * line) { @@ -587,6 +653,7 @@ parse_subrip (ParserState * state, const gchar * line) state->state = 0; subrip_unescape_formatting (ret); strip_trailing_newlines (ret); + subrip_fix_up_markup (&ret); return ret; } return NULL; @@ -818,12 +885,15 @@ handle_buffer (GstSubParse * self, GstBuffer * buf) if (subtitle) { guint subtitle_len = strlen (subtitle); + /* +1 for terminating NUL character */ ret = gst_pad_alloc_buffer_and_set_caps (self->srcpad, - GST_BUFFER_OFFSET_NONE, subtitle_len, + GST_BUFFER_OFFSET_NONE, subtitle_len + 1, GST_PAD_CAPS (self->srcpad), &buf); if (ret == GST_FLOW_OK) { - memcpy (GST_BUFFER_DATA (buf), subtitle, subtitle_len); + /* copy terminating NUL character as well */ + memcpy (GST_BUFFER_DATA (buf), subtitle, subtitle_len + 1); + GST_BUFFER_SIZE (buf) = subtitle_len; GST_BUFFER_TIMESTAMP (buf) = self->state.start_time; GST_BUFFER_DURATION (buf) = self->state.duration; diff --git a/tests/check/elements/subparse.c b/tests/check/elements/subparse.c index c34c739d88..80d32dface 100644 --- a/tests/check/elements/subparse.c +++ b/tests/check/elements/subparse.c @@ -51,24 +51,54 @@ buffer_from_static_string (const gchar * s) return buf; } -static struct +typedef struct { const gchar *in; GstClockTime from_ts; GstClockTime to_ts; const gchar *out; -} srt1_input[] = { +} SubParseInputChunk; + +static SubParseInputChunk srt_input[] = { { - "1\n00:00:01,000 --> 00:00:02,000\nOne\n\n", - 1 * GST_SECOND, 2 * GST_SECOND, "One"}, { - "2\n00:00:02,000 --> 00:00:03,000\nTwo\n\n", - 2 * GST_SECOND, 3 * GST_SECOND, "Two"}, { - "3\n00:00:03,000 --> 00:00:04,000\nThree\n\n", - 3 * GST_SECOND, 4 * GST_SECOND, "Three"}, { - "4\n00:00:04,000 --> 00:00:05,000\nFour\n\n", - 4 * GST_SECOND, 5 * GST_SECOND, "Four"}, { - "5\n00:00:05,000 --> 00:00:06,000\nFive\n", - 5 * GST_SECOND, 6 * GST_SECOND, "Five"} + "1\n00:00:01,000 --> 00:00:02,000\nOne\n\n", + 1 * GST_SECOND, 2 * GST_SECOND, "One"}, { + "2\n00:00:02,000 --> 00:00:03,000\nTwo\n\n", + 2 * GST_SECOND, 3 * GST_SECOND, "Two"}, { + "3\n00:00:03,000 --> 00:00:04,000\nThree\n\n", + 3 * GST_SECOND, 4 * GST_SECOND, "Three"}, { + "4\n00:00:04,000 --> 00:00:05,000\nFour\n\n", + 4 * GST_SECOND, 5 * GST_SECOND, "Four"}, { + "5\n00:00:05,000 --> 00:00:06,000\nFive\n\n", + 5 * GST_SECOND, 6 * GST_SECOND, "Five"}, { + /* markup should be preserved */ + "6\n00:00:06,000 --> 00:00:07,000\nSix\n\n", + 6 * GST_SECOND, 7 * GST_SECOND, "Six"}, { + /* open markup tags should be closed */ + "7\n00:00:07,000 --> 00:00:08,000\nSeven\n\n", + 7 * GST_SECOND, 8 * GST_SECOND, "Seven"}, { + /* open markup tags should be closed (II) */ + "8\n00:00:08,000 --> 00:00:09,000\nEight\n\n", + 8 * GST_SECOND, 9 * GST_SECOND, "Eight"}, { + /* broken markup should be fixed */ + "9\n00:00:09,000 --> 00:00:10,000\n\n\n", + 9 * GST_SECOND, 10 * GST_SECOND, ""}, { + "10\n00:00:10,000 --> 00:00:11,000\n\n\n", + 10 * GST_SECOND, 11 * GST_SECOND, ""}, { + "11\n00:00:11,000 --> 00:00:12,000\nxyz\n\n", + 11 * GST_SECOND, 12 * GST_SECOND, "xyz"}, { + "12\n00:00:12,000 --> 00:00:13,000\nxyz\n\n", + 12 * GST_SECOND, 13 * GST_SECOND, "xyz"}, { + /* skip a few chunk numbers here, the numbers shouldn't matter */ + "24\n00:01:00,000 --> 00:02:00,000\nYep, still here\n\n", + 60 * GST_SECOND, 120 * GST_SECOND, "Yep, still here"}, { + /* make sure stuff is escaped properly, but allowed markup stays intact */ + "25\n00:03:00,000 --> 00:04:00,000\ngave Rock & Roll to\n\n", + 180 * GST_SECOND, 240 * GST_SECOND, "gave Rock & Roll to"}, { + "26\n00:04:00,000 --> 00:05:00,000\nRock & Roll\n\n", + 240 * GST_SECOND, 300 * GST_SECOND, "Rock & Roll"}, { + "27\n00:06:00,000 --> 00:08:00,000\nRock & Roll\n\n", + 360 * GST_SECOND, 480 * GST_SECOND, "Rock & Roll"} }; static void @@ -104,7 +134,7 @@ teardown_subparse (void) } static void -test_srt_do_test (guint start_idx, guint num) +test_srt_do_test (SubParseInputChunk * input, guint start_idx, guint num) { guint n; @@ -115,7 +145,7 @@ test_srt_do_test (guint start_idx, guint num) for (n = start_idx; n < start_idx + num; ++n) { GstBuffer *buf; - buf = buffer_from_static_string (srt1_input[n].in); + buf = buffer_from_static_string (input[n].in); fail_unless_equals_int (gst_pad_push (mysrcpad, buf), GST_FLOW_OK); } @@ -133,20 +163,24 @@ test_srt_do_test (guint start_idx, guint num) fail_unless (buf != NULL); fail_unless (GST_BUFFER_TIMESTAMP_IS_VALID (buf), NULL); fail_unless (GST_BUFFER_DURATION_IS_VALID (buf), NULL); - fail_unless_equals_uint64 (GST_BUFFER_TIMESTAMP (buf), - srt1_input[n].from_ts); + fail_unless_equals_uint64 (GST_BUFFER_TIMESTAMP (buf), input[n].from_ts); fail_unless_equals_uint64 (GST_BUFFER_DURATION (buf), - srt1_input[n].to_ts - srt1_input[n].from_ts); + input[n].to_ts - input[n].from_ts); out = (gchar *) GST_BUFFER_DATA (buf); out_size = GST_BUFFER_SIZE (buf); /* shouldn't have trailing newline characters */ fail_if (out_size > 0 && out[out_size - 1] == '\n'); - fail_unless (strncmp (out, srt1_input[n].out, out_size) == 0); + /* shouldn't include NUL-terminator in data size */ + fail_if (out_size > 0 && out[out_size - 1] == '\0'); + /* but should still have a NUL-terminator behind the declared data */ + fail_unless_equals_int (out[out_size], '\0'); + /* make sure out string matches expected string */ + fail_unless_equals_string (out, input[n].out); /* check caps */ fail_unless (GST_BUFFER_CAPS (buf) != NULL); buffer_caps_struct = gst_caps_get_structure (GST_BUFFER_CAPS (buf), 0); - fail_unless (gst_structure_has_name (buffer_caps_struct, "text/plain") - || gst_structure_has_name (buffer_caps_struct, "text/x-pango-markup")); + fail_unless_equals_string (gst_structure_get_name (buffer_caps_struct), + "text/x-pango-markup"); } teardown_subparse (); @@ -154,16 +188,16 @@ test_srt_do_test (guint start_idx, guint num) GST_START_TEST (test_srt) { - test_srt_do_test (0, G_N_ELEMENTS (srt1_input)); + test_srt_do_test (srt_input, 0, G_N_ELEMENTS (srt_input)); /* make sure everything works fine if we don't start with chunk 1 */ - test_srt_do_test (1, G_N_ELEMENTS (srt1_input) - 1); - test_srt_do_test (2, G_N_ELEMENTS (srt1_input) - 2); - test_srt_do_test (3, G_N_ELEMENTS (srt1_input) - 3); - test_srt_do_test (4, G_N_ELEMENTS (srt1_input) - 4); + test_srt_do_test (srt_input, 1, G_N_ELEMENTS (srt_input) - 1); + test_srt_do_test (srt_input, 2, G_N_ELEMENTS (srt_input) - 2); + test_srt_do_test (srt_input, 3, G_N_ELEMENTS (srt_input) - 3); + test_srt_do_test (srt_input, 4, G_N_ELEMENTS (srt_input) - 4); /* try with empty input, immediate EOS */ - test_srt_do_test (5, G_N_ELEMENTS (srt1_input) - 5); + test_srt_do_test (srt_input, 5, G_N_ELEMENTS (srt_input) - 5); } GST_END_TEST;