gst/subparse/gstsubparse.c: Add missing closing tags for markup and fix broken markup, otherwise pango won't render a...

Original commit message from CVS:
* gst/subparse/gstsubparse.c: (subrip_fix_up_markup),
(parse_subrip), (handle_buffer):
Add missing closing tags for markup and fix broken markup,
otherwise pango won't render anything (fixes #357531). Also,
make sure the text we send out is always NUL-terminated
(better safe than sorry etc.).
* tests/check/elements/subparse.c: (test_srt_do_test),
(test_srt):
Some more tests for .srt incl. tests for the above stuff.
This commit is contained in:
Tim-Philipp Müller 2006-10-20 17:02:19 +00:00
parent 776911ecc2
commit ad087e01ff
3 changed files with 146 additions and 29 deletions

View file

@ -1,3 +1,16 @@
2006-10-20 Tim-Philipp Müller <tim at centricular dot net>
* gst/subparse/gstsubparse.c: (subrip_fix_up_markup),
(parse_subrip), (handle_buffer):
Add missing closing tags for markup and fix broken markup,
otherwise pango won't render anything (fixes #357531). Also,
make sure the text we send out is always NUL-terminated
(better safe than sorry etc.).
* tests/check/elements/subparse.c: (test_srt_do_test),
(test_srt):
Some more tests for .srt incl. tests for the above stuff.
2006-10-20 Julien MOUTTE <julien@moutte.net>
* sys/ximage/ximagesink.c: (gst_ximagesink_ximage_put):

View file

@ -1,6 +1,7 @@
/* GStreamer
* Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
* Copyright (c) 2004 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
* Copyright (C) 2004 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
* Copyright (C) 2006 Tim-Philipp Müller <tim centricular net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@ -528,6 +529,71 @@ subrip_unescape_formatting (gchar * txt)
}
}
/* we only allow <i>, <u> and <b>, so let's take a simple approach. This code
* assumes the input has been escaped and subrip_unescape_formatting() has then
* been run over the input! This function adds missing closing markup tags and
* removes broken closing tags for tags that have never been opened. */
static void
subrip_fix_up_markup (gchar ** p_txt)
{
gchar *cur, *next_tag;
gchar open_tags[32];
guint num_open_tags = 0;
g_assert (*p_txt != NULL);
cur = *p_txt;
while (*cur != '\0') {
next_tag = strchr (cur, '<');
if (next_tag == NULL)
break;
++next_tag;
switch (*next_tag) {
case '/':{
++next_tag;
if (num_open_tags == 0 || open_tags[num_open_tags - 1] != *next_tag) {
GST_LOG ("broken input, closing tag '%c' is not open", *next_tag);
g_memmove (next_tag - 2, next_tag + 2, strlen (next_tag + 2) + 1);
next_tag -= 2;
} else {
/* it's all good, closing tag which is open */
--num_open_tags;
}
break;
}
case 'i':
case 'b':
case 'u':
if (num_open_tags == G_N_ELEMENTS (open_tags))
return; /* something dodgy is going on, stop parsing */
open_tags[num_open_tags] = *next_tag;
++num_open_tags;
break;
default:
GST_ERROR ("unexpected tag '%c' (%s)", *next_tag, next_tag);
g_assert_not_reached ();
break;
}
cur = next_tag;
}
if (num_open_tags > 0) {
GString *s;
s = g_string_new (*p_txt);
while (num_open_tags > 0) {
GST_LOG ("adding missing closing tag '%c'", open_tags[num_open_tags - 1]);
g_string_append_c (s, '<');
g_string_append_c (s, '/');
g_string_append_c (s, open_tags[num_open_tags - 1]);
g_string_append_c (s, '>');
--num_open_tags;
}
g_free (*p_txt);
*p_txt = g_string_free (s, FALSE);
}
}
static gchar *
parse_subrip (ParserState * state, const gchar * line)
{
@ -587,6 +653,7 @@ parse_subrip (ParserState * state, const gchar * line)
state->state = 0;
subrip_unescape_formatting (ret);
strip_trailing_newlines (ret);
subrip_fix_up_markup (&ret);
return ret;
}
return NULL;
@ -818,12 +885,15 @@ handle_buffer (GstSubParse * self, GstBuffer * buf)
if (subtitle) {
guint subtitle_len = strlen (subtitle);
/* +1 for terminating NUL character */
ret = gst_pad_alloc_buffer_and_set_caps (self->srcpad,
GST_BUFFER_OFFSET_NONE, subtitle_len,
GST_BUFFER_OFFSET_NONE, subtitle_len + 1,
GST_PAD_CAPS (self->srcpad), &buf);
if (ret == GST_FLOW_OK) {
memcpy (GST_BUFFER_DATA (buf), subtitle, subtitle_len);
/* copy terminating NUL character as well */
memcpy (GST_BUFFER_DATA (buf), subtitle, subtitle_len + 1);
GST_BUFFER_SIZE (buf) = subtitle_len;
GST_BUFFER_TIMESTAMP (buf) = self->state.start_time;
GST_BUFFER_DURATION (buf) = self->state.duration;

View file

@ -51,24 +51,54 @@ buffer_from_static_string (const gchar * s)
return buf;
}
static struct
typedef struct
{
const gchar *in;
GstClockTime from_ts;
GstClockTime to_ts;
const gchar *out;
} srt1_input[] = {
} SubParseInputChunk;
static SubParseInputChunk srt_input[] = {
{
"1\n00:00:01,000 --> 00:00:02,000\nOne\n\n",
1 * GST_SECOND, 2 * GST_SECOND, "One"}, {
"2\n00:00:02,000 --> 00:00:03,000\nTwo\n\n",
2 * GST_SECOND, 3 * GST_SECOND, "Two"}, {
"3\n00:00:03,000 --> 00:00:04,000\nThree\n\n",
3 * GST_SECOND, 4 * GST_SECOND, "Three"}, {
"4\n00:00:04,000 --> 00:00:05,000\nFour\n\n",
4 * GST_SECOND, 5 * GST_SECOND, "Four"}, {
"5\n00:00:05,000 --> 00:00:06,000\nFive\n",
5 * GST_SECOND, 6 * GST_SECOND, "Five"}
"1\n00:00:01,000 --> 00:00:02,000\nOne\n\n",
1 * GST_SECOND, 2 * GST_SECOND, "One"}, {
"2\n00:00:02,000 --> 00:00:03,000\nTwo\n\n",
2 * GST_SECOND, 3 * GST_SECOND, "Two"}, {
"3\n00:00:03,000 --> 00:00:04,000\nThree\n\n",
3 * GST_SECOND, 4 * GST_SECOND, "Three"}, {
"4\n00:00:04,000 --> 00:00:05,000\nFour\n\n",
4 * GST_SECOND, 5 * GST_SECOND, "Four"}, {
"5\n00:00:05,000 --> 00:00:06,000\nFive\n\n",
5 * GST_SECOND, 6 * GST_SECOND, "Five"}, {
/* markup should be preserved */
"6\n00:00:06,000 --> 00:00:07,000\n<i>Six</i>\n\n",
6 * GST_SECOND, 7 * GST_SECOND, "<i>Six</i>"}, {
/* open markup tags should be closed */
"7\n00:00:07,000 --> 00:00:08,000\n<i>Seven\n\n",
7 * GST_SECOND, 8 * GST_SECOND, "<i>Seven</i>"}, {
/* open markup tags should be closed (II) */
"8\n00:00:08,000 --> 00:00:09,000\n<b><i>Eight\n\n",
8 * GST_SECOND, 9 * GST_SECOND, "<b><i>Eight</i></b>"}, {
/* broken markup should be fixed */
"9\n00:00:09,000 --> 00:00:10,000\n</b>\n\n",
9 * GST_SECOND, 10 * GST_SECOND, ""}, {
"10\n00:00:10,000 --> 00:00:11,000\n</b></i>\n\n",
10 * GST_SECOND, 11 * GST_SECOND, ""}, {
"11\n00:00:11,000 --> 00:00:12,000\n<i>xyz</b></i>\n\n",
11 * GST_SECOND, 12 * GST_SECOND, "<i>xyz</i>"}, {
"12\n00:00:12,000 --> 00:00:13,000\n<i>xyz</b>\n\n",
12 * GST_SECOND, 13 * GST_SECOND, "<i>xyz</i>"}, {
/* skip a few chunk numbers here, the numbers shouldn't matter */
"24\n00:01:00,000 --> 00:02:00,000\nYep, still here\n\n",
60 * GST_SECOND, 120 * GST_SECOND, "Yep, still here"}, {
/* make sure stuff is escaped properly, but allowed markup stays intact */
"25\n00:03:00,000 --> 00:04:00,000\ngave <i>Rock & Roll</i> to\n\n",
180 * GST_SECOND, 240 * GST_SECOND, "gave <i>Rock &amp; Roll</i> to"}, {
"26\n00:04:00,000 --> 00:05:00,000\n<i>Rock & Roll</i>\n\n",
240 * GST_SECOND, 300 * GST_SECOND, "<i>Rock &amp; Roll</i>"}, {
"27\n00:06:00,000 --> 00:08:00,000\nRock & Roll\n\n",
360 * GST_SECOND, 480 * GST_SECOND, "Rock &amp; Roll"}
};
static void
@ -104,7 +134,7 @@ teardown_subparse (void)
}
static void
test_srt_do_test (guint start_idx, guint num)
test_srt_do_test (SubParseInputChunk * input, guint start_idx, guint num)
{
guint n;
@ -115,7 +145,7 @@ test_srt_do_test (guint start_idx, guint num)
for (n = start_idx; n < start_idx + num; ++n) {
GstBuffer *buf;
buf = buffer_from_static_string (srt1_input[n].in);
buf = buffer_from_static_string (input[n].in);
fail_unless_equals_int (gst_pad_push (mysrcpad, buf), GST_FLOW_OK);
}
@ -133,20 +163,24 @@ test_srt_do_test (guint start_idx, guint num)
fail_unless (buf != NULL);
fail_unless (GST_BUFFER_TIMESTAMP_IS_VALID (buf), NULL);
fail_unless (GST_BUFFER_DURATION_IS_VALID (buf), NULL);
fail_unless_equals_uint64 (GST_BUFFER_TIMESTAMP (buf),
srt1_input[n].from_ts);
fail_unless_equals_uint64 (GST_BUFFER_TIMESTAMP (buf), input[n].from_ts);
fail_unless_equals_uint64 (GST_BUFFER_DURATION (buf),
srt1_input[n].to_ts - srt1_input[n].from_ts);
input[n].to_ts - input[n].from_ts);
out = (gchar *) GST_BUFFER_DATA (buf);
out_size = GST_BUFFER_SIZE (buf);
/* shouldn't have trailing newline characters */
fail_if (out_size > 0 && out[out_size - 1] == '\n');
fail_unless (strncmp (out, srt1_input[n].out, out_size) == 0);
/* shouldn't include NUL-terminator in data size */
fail_if (out_size > 0 && out[out_size - 1] == '\0');
/* but should still have a NUL-terminator behind the declared data */
fail_unless_equals_int (out[out_size], '\0');
/* make sure out string matches expected string */
fail_unless_equals_string (out, input[n].out);
/* check caps */
fail_unless (GST_BUFFER_CAPS (buf) != NULL);
buffer_caps_struct = gst_caps_get_structure (GST_BUFFER_CAPS (buf), 0);
fail_unless (gst_structure_has_name (buffer_caps_struct, "text/plain")
|| gst_structure_has_name (buffer_caps_struct, "text/x-pango-markup"));
fail_unless_equals_string (gst_structure_get_name (buffer_caps_struct),
"text/x-pango-markup");
}
teardown_subparse ();
@ -154,16 +188,16 @@ test_srt_do_test (guint start_idx, guint num)
GST_START_TEST (test_srt)
{
test_srt_do_test (0, G_N_ELEMENTS (srt1_input));
test_srt_do_test (srt_input, 0, G_N_ELEMENTS (srt_input));
/* make sure everything works fine if we don't start with chunk 1 */
test_srt_do_test (1, G_N_ELEMENTS (srt1_input) - 1);
test_srt_do_test (2, G_N_ELEMENTS (srt1_input) - 2);
test_srt_do_test (3, G_N_ELEMENTS (srt1_input) - 3);
test_srt_do_test (4, G_N_ELEMENTS (srt1_input) - 4);
test_srt_do_test (srt_input, 1, G_N_ELEMENTS (srt_input) - 1);
test_srt_do_test (srt_input, 2, G_N_ELEMENTS (srt_input) - 2);
test_srt_do_test (srt_input, 3, G_N_ELEMENTS (srt_input) - 3);
test_srt_do_test (srt_input, 4, G_N_ELEMENTS (srt_input) - 4);
/* try with empty input, immediate EOS */
test_srt_do_test (5, G_N_ELEMENTS (srt1_input) - 5);
test_srt_do_test (srt_input, 5, G_N_ELEMENTS (srt_input) - 5);
}
GST_END_TEST;