mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-10-06 10:42:22 +00:00
Add support for subtitle files with UTF-8 BOM at the beginning by simple stripping it from the first line before pass...
Original commit message from CVS: * gst/subparse/gstsubparse.c: (gst_sub_parse_data_format_autodetect), (handle_buffer), (gst_sub_parse_change_state): * gst/subparse/gstsubparse.h: * tests/check/elements/subparse.c: (GST_START_TEST): Add support for subtitle files with UTF-8 BOM at the beginning by simple stripping it from the first line before passing it to any parsing code. Fixes bug #555257 and playback of files created by Gnome Subtitles.
This commit is contained in:
parent
81f5117fa9
commit
60bf63486b
4 changed files with 43 additions and 2 deletions
12
ChangeLog
12
ChangeLog
|
@ -1,3 +1,15 @@
|
||||||
|
2008-10-10 Sebastian Dröge <sebastian.droege@collabora.co.uk>
|
||||||
|
|
||||||
|
* gst/subparse/gstsubparse.c:
|
||||||
|
(gst_sub_parse_data_format_autodetect), (handle_buffer),
|
||||||
|
(gst_sub_parse_change_state):
|
||||||
|
* gst/subparse/gstsubparse.h:
|
||||||
|
* tests/check/elements/subparse.c: (GST_START_TEST):
|
||||||
|
Add support for subtitle files with UTF-8 BOM at the beginning
|
||||||
|
by simple stripping it from the first line before passing it
|
||||||
|
to any parsing code. Fixes bug #555257 and playback of files
|
||||||
|
created by Gnome Subtitles.
|
||||||
|
|
||||||
2008-10-10 Wim Taymans <wim.taymans@collabora.co.uk>
|
2008-10-10 Wim Taymans <wim.taymans@collabora.co.uk>
|
||||||
|
|
||||||
* gst/audiotestsrc/gstaudiotestsrc.c:
|
* gst/audiotestsrc/gstaudiotestsrc.c:
|
||||||
|
|
|
@ -922,6 +922,11 @@ gst_sub_parse_data_format_autodetect (gchar * match_str)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If the string contains a UTF-8 BOM drop it */
|
||||||
|
if ((guint8) match_str[0] == 0xEF && (guint8) match_str[1] == 0xBB
|
||||||
|
&& (guint8) match_str[2] == 0xBF)
|
||||||
|
match_str += 3;
|
||||||
|
|
||||||
if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) {
|
if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) {
|
||||||
GST_LOG ("MicroDVD (frame based) format detected");
|
GST_LOG ("MicroDVD (frame based) format detected");
|
||||||
return GST_SUB_PARSE_FORMAT_MDVDSUB;
|
return GST_SUB_PARSE_FORMAT_MDVDSUB;
|
||||||
|
@ -1073,11 +1078,22 @@ handle_buffer (GstSubParse * self, GstBuffer * buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((line = get_next_line (self)) && !self->flushing) {
|
while ((line = get_next_line (self)) && !self->flushing) {
|
||||||
|
guint offset = 0;
|
||||||
|
|
||||||
|
/* If this is the first line and it contains a UTF-8 BOM drop it */
|
||||||
|
if (self->first_line && strlen (line) >= 3 &&
|
||||||
|
(guint8) line[0] == 0xEF && (guint8) line[1] == 0xBB
|
||||||
|
&& (guint8) line[2] == 0xBF) {
|
||||||
|
offset = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
self->first_line = FALSE;
|
||||||
|
|
||||||
/* Set segment on our parser state machine */
|
/* Set segment on our parser state machine */
|
||||||
self->state.segment = &self->segment;
|
self->state.segment = &self->segment;
|
||||||
/* Now parse the line, out of segment lines will just return NULL */
|
/* Now parse the line, out of segment lines will just return NULL */
|
||||||
GST_LOG_OBJECT (self, "Parsing line '%s'", line);
|
GST_LOG_OBJECT (self, "Parsing line '%s'", line + offset);
|
||||||
subtitle = self->parse_line (&self->state, line);
|
subtitle = self->parse_line (&self->state, line + offset);
|
||||||
g_free (line);
|
g_free (line);
|
||||||
|
|
||||||
if (subtitle) {
|
if (subtitle) {
|
||||||
|
@ -1252,6 +1268,7 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition)
|
||||||
self->next_offset = 0;
|
self->next_offset = 0;
|
||||||
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
|
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
|
||||||
self->valid_utf8 = TRUE;
|
self->valid_utf8 = TRUE;
|
||||||
|
self->first_line = TRUE;
|
||||||
g_string_truncate (self->textbuf, 0);
|
g_string_truncate (self->textbuf, 0);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -93,6 +93,8 @@ struct _GstSubParse {
|
||||||
gboolean flushing;
|
gboolean flushing;
|
||||||
gboolean valid_utf8;
|
gboolean valid_utf8;
|
||||||
gchar *encoding;
|
gchar *encoding;
|
||||||
|
|
||||||
|
gboolean first_line;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct _GstSubParseClass {
|
struct _GstSubParseClass {
|
||||||
|
|
|
@ -132,6 +132,13 @@ static SubParseInputChunk srt_input1[] = {
|
||||||
44 * GST_SECOND + 44 * GST_MSECOND, "I still cant see anything."}
|
44 * GST_SECOND + 44 * GST_MSECOND, "I still cant see anything."}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* has UTF-8 BOM at the start */
|
||||||
|
static SubParseInputChunk srt_input2[] = {
|
||||||
|
{
|
||||||
|
"\xef\xbb\xbf" "1\n00:00:00,000 --> 00:00:03,50\nJust testing.\n\n",
|
||||||
|
0, 3 * GST_SECOND + 50 * GST_MSECOND, "Just testing."}
|
||||||
|
};
|
||||||
|
|
||||||
static void
|
static void
|
||||||
setup_subparse (void)
|
setup_subparse (void)
|
||||||
{
|
{
|
||||||
|
@ -237,6 +244,9 @@ GST_START_TEST (test_srt)
|
||||||
|
|
||||||
/* try with spaces instead of doubled zeroes (which is not exactly according to spec) */
|
/* try with spaces instead of doubled zeroes (which is not exactly according to spec) */
|
||||||
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input1));
|
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input1));
|
||||||
|
|
||||||
|
/* try with UTF-8 BOM at the start */
|
||||||
|
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input2));
|
||||||
}
|
}
|
||||||
|
|
||||||
GST_END_TEST;
|
GST_END_TEST;
|
||||||
|
|
Loading…
Reference in a new issue