Add support for subtitle files with UTF-8 BOM at the beginning by simple stripping it from the first line before pass...

Original commit message from CVS:
* gst/subparse/gstsubparse.c:
(gst_sub_parse_data_format_autodetect), (handle_buffer),
(gst_sub_parse_change_state):
* gst/subparse/gstsubparse.h:
* tests/check/elements/subparse.c: (GST_START_TEST):
Add support for subtitle files with UTF-8 BOM at the beginning
by simple stripping it from the first line before passing it
to any parsing code. Fixes bug  and playback of files
created by Gnome Subtitles.
This commit is contained in:
Sebastian Dröge 2008-10-10 17:13:40 +00:00
parent 81f5117fa9
commit 60bf63486b
4 changed files with 43 additions and 2 deletions
ChangeLog
gst/subparse
tests/check/elements

View file

@ -1,3 +1,15 @@
2008-10-10 Sebastian Dröge <sebastian.droege@collabora.co.uk>
* gst/subparse/gstsubparse.c:
(gst_sub_parse_data_format_autodetect), (handle_buffer),
(gst_sub_parse_change_state):
* gst/subparse/gstsubparse.h:
* tests/check/elements/subparse.c: (GST_START_TEST):
Add support for subtitle files with UTF-8 BOM at the beginning
by simple stripping it from the first line before passing it
to any parsing code. Fixes bug #555257 and playback of files
created by Gnome Subtitles.
2008-10-10 Wim Taymans <wim.taymans@collabora.co.uk>
* gst/audiotestsrc/gstaudiotestsrc.c:

View file

@ -922,6 +922,11 @@ gst_sub_parse_data_format_autodetect (gchar * match_str)
}
}
/* If the string contains a UTF-8 BOM drop it */
if ((guint8) match_str[0] == 0xEF && (guint8) match_str[1] == 0xBB
&& (guint8) match_str[2] == 0xBF)
match_str += 3;
if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) {
GST_LOG ("MicroDVD (frame based) format detected");
return GST_SUB_PARSE_FORMAT_MDVDSUB;
@ -1073,11 +1078,22 @@ handle_buffer (GstSubParse * self, GstBuffer * buf)
}
while ((line = get_next_line (self)) && !self->flushing) {
guint offset = 0;
/* If this is the first line and it contains a UTF-8 BOM drop it */
if (self->first_line && strlen (line) >= 3 &&
(guint8) line[0] == 0xEF && (guint8) line[1] == 0xBB
&& (guint8) line[2] == 0xBF) {
offset = 3;
}
self->first_line = FALSE;
/* Set segment on our parser state machine */
self->state.segment = &self->segment;
/* Now parse the line, out of segment lines will just return NULL */
GST_LOG_OBJECT (self, "Parsing line '%s'", line);
subtitle = self->parse_line (&self->state, line);
GST_LOG_OBJECT (self, "Parsing line '%s'", line + offset);
subtitle = self->parse_line (&self->state, line + offset);
g_free (line);
if (subtitle) {
@ -1252,6 +1268,7 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition)
self->next_offset = 0;
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
self->valid_utf8 = TRUE;
self->first_line = TRUE;
g_string_truncate (self->textbuf, 0);
break;
default:

View file

@ -93,6 +93,8 @@ struct _GstSubParse {
gboolean flushing;
gboolean valid_utf8;
gchar *encoding;
gboolean first_line;
};
struct _GstSubParseClass {

View file

@ -132,6 +132,13 @@ static SubParseInputChunk srt_input1[] = {
44 * GST_SECOND + 44 * GST_MSECOND, "I still cant see anything."}
};
/* has UTF-8 BOM at the start */
static SubParseInputChunk srt_input2[] = {
{
"\xef\xbb\xbf" "1\n00:00:00,000 --> 00:00:03,50\nJust testing.\n\n",
0, 3 * GST_SECOND + 50 * GST_MSECOND, "Just testing."}
};
static void
setup_subparse (void)
{
@ -237,6 +244,9 @@ GST_START_TEST (test_srt)
/* try with spaces instead of doubled zeroes (which is not exactly according to spec) */
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input1));
/* try with UTF-8 BOM at the start */
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input2));
}
GST_END_TEST;