Add support for subtitle files with UTF-8 BOM at the beginning by simple stripping it from the first line before pass...

Original commit message from CVS:
* gst/subparse/gstsubparse.c:
(gst_sub_parse_data_format_autodetect), (handle_buffer),
(gst_sub_parse_change_state):
* gst/subparse/gstsubparse.h:
* tests/check/elements/subparse.c: (GST_START_TEST):
Add support for subtitle files with UTF-8 BOM at the beginning
by simple stripping it from the first line before passing it
to any parsing code. Fixes bug #555257 and playback of files
created by Gnome Subtitles.
This commit is contained in:
Sebastian Dröge 2008-10-10 17:13:40 +00:00
parent 81f5117fa9
commit 60bf63486b
4 changed files with 43 additions and 2 deletions

View file

@ -1,3 +1,15 @@
2008-10-10 Sebastian Dröge <sebastian.droege@collabora.co.uk>
* gst/subparse/gstsubparse.c:
(gst_sub_parse_data_format_autodetect), (handle_buffer),
(gst_sub_parse_change_state):
* gst/subparse/gstsubparse.h:
* tests/check/elements/subparse.c: (GST_START_TEST):
Add support for subtitle files with UTF-8 BOM at the beginning
by simple stripping it from the first line before passing it
to any parsing code. Fixes bug #555257 and playback of files
created by Gnome Subtitles.
2008-10-10 Wim Taymans <wim.taymans@collabora.co.uk>
* gst/audiotestsrc/gstaudiotestsrc.c:

View file

@ -922,6 +922,11 @@ gst_sub_parse_data_format_autodetect (gchar * match_str)
}
}
/* If the string contains a UTF-8 BOM drop it */
if ((guint8) match_str[0] == 0xEF && (guint8) match_str[1] == 0xBB
&& (guint8) match_str[2] == 0xBF)
match_str += 3;
if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) {
GST_LOG ("MicroDVD (frame based) format detected");
return GST_SUB_PARSE_FORMAT_MDVDSUB;
@ -1073,11 +1078,22 @@ handle_buffer (GstSubParse * self, GstBuffer * buf)
}
while ((line = get_next_line (self)) && !self->flushing) {
guint offset = 0;
/* If this is the first line and it contains a UTF-8 BOM drop it */
if (self->first_line && strlen (line) >= 3 &&
(guint8) line[0] == 0xEF && (guint8) line[1] == 0xBB
&& (guint8) line[2] == 0xBF) {
offset = 3;
}
self->first_line = FALSE;
/* Set segment on our parser state machine */
self->state.segment = &self->segment;
/* Now parse the line, out of segment lines will just return NULL */
GST_LOG_OBJECT (self, "Parsing line '%s'", line);
subtitle = self->parse_line (&self->state, line);
GST_LOG_OBJECT (self, "Parsing line '%s'", line + offset);
subtitle = self->parse_line (&self->state, line + offset);
g_free (line);
if (subtitle) {
@ -1252,6 +1268,7 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition)
self->next_offset = 0;
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
self->valid_utf8 = TRUE;
self->first_line = TRUE;
g_string_truncate (self->textbuf, 0);
break;
default:

View file

@ -93,6 +93,8 @@ struct _GstSubParse {
gboolean flushing;
gboolean valid_utf8;
gchar *encoding;
gboolean first_line;
};
struct _GstSubParseClass {

View file

@ -132,6 +132,13 @@ static SubParseInputChunk srt_input1[] = {
44 * GST_SECOND + 44 * GST_MSECOND, "I still cant see anything."}
};
/* has UTF-8 BOM at the start */
static SubParseInputChunk srt_input2[] = {
{
"\xef\xbb\xbf" "1\n00:00:00,000 --> 00:00:03,50\nJust testing.\n\n",
0, 3 * GST_SECOND + 50 * GST_MSECOND, "Just testing."}
};
static void
setup_subparse (void)
{
@ -237,6 +244,9 @@ GST_START_TEST (test_srt)
/* try with spaces instead of doubled zeroes (which is not exactly according to spec) */
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input1));
/* try with UTF-8 BOM at the start */
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input2));
}
GST_END_TEST;