gst/subparse/gstsubparse.c: For SubRip (.srt) subtitles, ignore all markup tags we don't handle (like font tags, for ...

Original commit message from CVS:
* gst/subparse/gstsubparse.c: (subrip_remove_unhandled_tag),
(subrip_remove_unhandled_tags), (parse_subrip):
For SubRip (.srt) subtitles, ignore all markup tags we don't
handle (like font tags, for example).
* tests/check/elements/subparse.c:
Add test for this.
This commit is contained in:
Tim-Philipp Müller 2007-02-10 18:19:37 +00:00
parent d2f03d2cec
commit 5f186351a1
3 changed files with 51 additions and 1 deletions

View file

@ -1,3 +1,13 @@
2007-02-10 Tim-Philipp Müller <tim at centricular dot net>
* gst/subparse/gstsubparse.c: (subrip_remove_unhandled_tag),
(subrip_remove_unhandled_tags), (parse_subrip):
For SubRip (.srt) subtitles, ignore all markup tags we don't
handle (like font tags, for example).
* tests/check/elements/subparse.c:
Add test for this.
2007-02-09 Tim-Philipp Müller <tim at centricular dot net> 2007-02-09 Tim-Philipp Müller <tim at centricular dot net>
* gst/playback/gstdecodebin.c: (add_fakesink), * gst/playback/gstdecodebin.c: (add_fakesink),

View file

@ -547,6 +547,42 @@ subrip_unescape_formatting (gchar * txt)
} }
} }
static gboolean
subrip_remove_unhandled_tag (gchar * start, gchar * stop)
{
gchar *tag, saved;
tag = start + strlen ("&lt;");
if (*tag == '/')
++tag;
if (g_ascii_tolower (*tag) < 'a' || g_ascii_tolower (*tag) > 'z')
return FALSE;
saved = *stop;
*stop = '\0';
GST_LOG ("removing unhandled tag '%s'", start);
*stop = saved;
g_memmove (start, stop, strlen (stop) + 1);
return TRUE;
}
/* remove tags we haven't explicitly allowed earlier on, like font tags
* for example */
static void
subrip_remove_unhandled_tags (gchar * txt)
{
gchar *pos, *gt;
for (pos = txt; pos != NULL && *pos != '\0'; ++pos) {
if (strncmp (pos, "&lt;", 4) == 0 && (gt = strstr (pos + 4, "&gt;"))) {
if (subrip_remove_unhandled_tag (pos, gt + strlen ("&gt;")))
--pos;
}
}
}
/* we only allow <i>, <u> and <b>, so let's take a simple approach. This code /* we only allow <i>, <u> and <b>, so let's take a simple approach. This code
* assumes the input has been escaped and subrip_unescape_formatting() has then * assumes the input has been escaped and subrip_unescape_formatting() has then
* been run over the input! This function adds missing closing markup tags and * been run over the input! This function adds missing closing markup tags and
@ -670,6 +706,7 @@ parse_subrip (ParserState * state, const gchar * line)
g_string_truncate (state->buf, 0); g_string_truncate (state->buf, 0);
state->state = 0; state->state = 0;
subrip_unescape_formatting (ret); subrip_unescape_formatting (ret);
subrip_remove_unhandled_tags (ret);
strip_trailing_newlines (ret); strip_trailing_newlines (ret);
subrip_fix_up_markup (&ret); subrip_fix_up_markup (&ret);
return ret; return ret;

View file

@ -100,7 +100,10 @@ static SubParseInputChunk srt_input[] = {
"26\n00:04:00,000 --> 00:05:00,000\n<i>Rock & Roll</i>\n\n", "26\n00:04:00,000 --> 00:05:00,000\n<i>Rock & Roll</i>\n\n",
240 * GST_SECOND, 300 * GST_SECOND, "<i>Rock &amp; Roll</i>"}, { 240 * GST_SECOND, 300 * GST_SECOND, "<i>Rock &amp; Roll</i>"}, {
"27\n00:06:00,000 --> 00:08:00,000\nRock & Roll\n\n", "27\n00:06:00,000 --> 00:08:00,000\nRock & Roll\n\n",
360 * GST_SECOND, 480 * GST_SECOND, "Rock &amp; Roll"} 360 * GST_SECOND, 480 * GST_SECOND, "Rock &amp; Roll"}, {
"28\n00:10:00,000 --> 00:11:00,000\n"
"<font \"#0000FF\"><joj>This is </xxx>in blue but <5</font>\n\n",
600 * GST_SECOND, 660 * GST_SECOND, "This is in blue but &lt;5"}
}; };
static void static void