typefinding: detect stand-alone SSA/ASS subtitle files

https://bugzilla.gnome.org/show_bug.cgi?id=625113
This commit is contained in:
Tim-Philipp Müller 2012-12-16 12:05:02 +00:00
parent 3d5a78e67a
commit 2f177a7616

View file

@ -4699,6 +4699,93 @@ dvdiso_type_find (GstTypeFind * tf, gpointer private)
"application/octet-stream", NULL);
}
/* SSA/ASS subtitles
*
* http://en.wikipedia.org/wiki/SubStation_Alpha
* http://matroska.org/technical/specs/subtitles/ssa.html
*/
static void
ssa_type_find (GstTypeFind * tf, gpointer private)
{
const gchar *start, *end, *ver_str, *media_type = NULL;
const guint8 *data;
gchar *str, *script_type, *p = NULL;
gint64 len;
data = gst_type_find_peek (tf, 0, 32);
if (data == NULL)
return;
/* there might be a BOM at the beginning */
if (memcmp (data, "[Script Info]", 13) != 0 &&
memcmp (data + 2, "[Script Info]", 13) != 0 &&
memcmp (data + 3, "[Script Info]", 13) != 0 &&
memcmp (data + 4, "[Script Info]", 13) != 0) {
return;
}
/* now check if we have SSA or ASS */
len = gst_type_find_get_length (tf);
if (len > 8192)
len = 8192;
data = gst_type_find_peek (tf, 0, len);
if (data == NULL)
return;
/* skip BOM */
start = (gchar *) memchr (data, '[', 5);
g_assert (start);
len -= (start - (gchar *) data);
/* ignore anything non-UTF8 for now, in future we might at least allow
* other UTF variants that are clearly prefixed with the appropriate BOM */
if (!g_utf8_validate (start, len, &end) && (len - (end - start)) > 6) {
GST_FIXME ("non-UTF8 SSA/ASS file");
return;
}
/* something at start, but not a UTF-8 BOM? */
if (data[0] != '[' && (data[0] != 0xEF || data[1] != 0xBB || data[2] != 0xBF))
return;
/* ignore any partial UTF-8 characters at the end */
len = end - start;
/* create a NUL-terminated string so it's easier to process it safely */
str = g_strndup (start, len - 1);
script_type = strstr (str, "ScriptType:");
if (script_type != NULL) {
gdouble version;
ver_str = script_type + 11;
while (*ver_str == ' ' || *ver_str == 'v' || *ver_str == 'V')
++ver_str;
version = g_ascii_strtod (ver_str, &p);
if (version == 4.0 && p != NULL && *p == '+')
media_type = "application/x-ass";
else if (version >= 1.0 && version <= 4.0)
media_type = "application/x-ssa";
}
if (media_type == NULL) {
if (strstr (str, "[v4+ Styles]") || strstr (str, "[V4+ Styles]"))
media_type = "application/x-ass";
else if (strstr (str, "[v4 Styles]") || strstr (str, "[V4 Styles]"))
media_type = "application/x-ssa";
}
if (media_type != NULL) {
gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
media_type, "parsed", G_TYPE_BOOLEAN, FALSE, NULL);
} else {
GST_WARNING ("could not detect SSA/ASS variant");
}
g_free (str);
}
/*** generic typefind for streams that have some data at a specific position***/
typedef struct
{
@ -5073,6 +5160,9 @@ plugin_init (GstPlugin * plugin)
TYPE_FIND_REGISTER (plugin, "application/octet-stream", GST_RANK_MARGINAL,
dvdiso_type_find, NULL, NULL, NULL, NULL);
TYPE_FIND_REGISTER (plugin, "application/x-ssa", GST_RANK_SECONDARY,
ssa_type_find, "ssa,ass", NULL, NULL, NULL);
return TRUE;
}