mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-13 19:05:37 +00:00
Add support for subtitle files with UTF-8 BOM at the beginning by simple stripping it from the first line before pass...
Original commit message from CVS: * gst/subparse/gstsubparse.c: (gst_sub_parse_data_format_autodetect), (handle_buffer), (gst_sub_parse_change_state): * gst/subparse/gstsubparse.h: * tests/check/elements/subparse.c: (GST_START_TEST): Add support for subtitle files with UTF-8 BOM at the beginning by simple stripping it from the first line before passing it to any parsing code. Fixes bug #555257 and playback of files created by Gnome Subtitles.
This commit is contained in:
parent
81f5117fa9
commit
60bf63486b
4 changed files with 43 additions and 2 deletions
12
ChangeLog
12
ChangeLog
|
@ -1,3 +1,15 @@
|
|||
2008-10-10 Sebastian Dröge <sebastian.droege@collabora.co.uk>
|
||||
|
||||
* gst/subparse/gstsubparse.c:
|
||||
(gst_sub_parse_data_format_autodetect), (handle_buffer),
|
||||
(gst_sub_parse_change_state):
|
||||
* gst/subparse/gstsubparse.h:
|
||||
* tests/check/elements/subparse.c: (GST_START_TEST):
|
||||
Add support for subtitle files with UTF-8 BOM at the beginning
|
||||
by simple stripping it from the first line before passing it
|
||||
to any parsing code. Fixes bug #555257 and playback of files
|
||||
created by Gnome Subtitles.
|
||||
|
||||
2008-10-10 Wim Taymans <wim.taymans@collabora.co.uk>
|
||||
|
||||
* gst/audiotestsrc/gstaudiotestsrc.c:
|
||||
|
|
|
@ -922,6 +922,11 @@ gst_sub_parse_data_format_autodetect (gchar * match_str)
|
|||
}
|
||||
}
|
||||
|
||||
/* If the string contains a UTF-8 BOM drop it */
|
||||
if ((guint8) match_str[0] == 0xEF && (guint8) match_str[1] == 0xBB
|
||||
&& (guint8) match_str[2] == 0xBF)
|
||||
match_str += 3;
|
||||
|
||||
if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) {
|
||||
GST_LOG ("MicroDVD (frame based) format detected");
|
||||
return GST_SUB_PARSE_FORMAT_MDVDSUB;
|
||||
|
@ -1073,11 +1078,22 @@ handle_buffer (GstSubParse * self, GstBuffer * buf)
|
|||
}
|
||||
|
||||
while ((line = get_next_line (self)) && !self->flushing) {
|
||||
guint offset = 0;
|
||||
|
||||
/* If this is the first line and it contains a UTF-8 BOM drop it */
|
||||
if (self->first_line && strlen (line) >= 3 &&
|
||||
(guint8) line[0] == 0xEF && (guint8) line[1] == 0xBB
|
||||
&& (guint8) line[2] == 0xBF) {
|
||||
offset = 3;
|
||||
}
|
||||
|
||||
self->first_line = FALSE;
|
||||
|
||||
/* Set segment on our parser state machine */
|
||||
self->state.segment = &self->segment;
|
||||
/* Now parse the line, out of segment lines will just return NULL */
|
||||
GST_LOG_OBJECT (self, "Parsing line '%s'", line);
|
||||
subtitle = self->parse_line (&self->state, line);
|
||||
GST_LOG_OBJECT (self, "Parsing line '%s'", line + offset);
|
||||
subtitle = self->parse_line (&self->state, line + offset);
|
||||
g_free (line);
|
||||
|
||||
if (subtitle) {
|
||||
|
@ -1252,6 +1268,7 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition)
|
|||
self->next_offset = 0;
|
||||
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
|
||||
self->valid_utf8 = TRUE;
|
||||
self->first_line = TRUE;
|
||||
g_string_truncate (self->textbuf, 0);
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -93,6 +93,8 @@ struct _GstSubParse {
|
|||
gboolean flushing;
|
||||
gboolean valid_utf8;
|
||||
gchar *encoding;
|
||||
|
||||
gboolean first_line;
|
||||
};
|
||||
|
||||
struct _GstSubParseClass {
|
||||
|
|
|
@ -132,6 +132,13 @@ static SubParseInputChunk srt_input1[] = {
|
|||
44 * GST_SECOND + 44 * GST_MSECOND, "I still cant see anything."}
|
||||
};
|
||||
|
||||
/* has UTF-8 BOM at the start */
|
||||
static SubParseInputChunk srt_input2[] = {
|
||||
{
|
||||
"\xef\xbb\xbf" "1\n00:00:00,000 --> 00:00:03,50\nJust testing.\n\n",
|
||||
0, 3 * GST_SECOND + 50 * GST_MSECOND, "Just testing."}
|
||||
};
|
||||
|
||||
static void
|
||||
setup_subparse (void)
|
||||
{
|
||||
|
@ -237,6 +244,9 @@ GST_START_TEST (test_srt)
|
|||
|
||||
/* try with spaces instead of doubled zeroes (which is not exactly according to spec) */
|
||||
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input1));
|
||||
|
||||
/* try with UTF-8 BOM at the start */
|
||||
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input2));
|
||||
}
|
||||
|
||||
GST_END_TEST;
|
||||
|
|
Loading…
Reference in a new issue