/* GStreamer SSA subtitle parser * Copyright (c) 2006 Tim-Philipp Müller * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ /* Super-primitive SSA parser - we just want the text and ignore * everything else like styles and timing codes etc. for now */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include /* isspace() */ #include /* sscanf() */ #include /* atoi() */ #include #include "gstssaparse.h" #include "gstsubparseelements.h" GST_DEBUG_CATEGORY_STATIC (ssa_parse_debug); #undef GST_CAT_DEFAULT #define GST_CAT_DEFAULT ssa_parse_debug static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS ("application/x-ssa; application/x-ass") ); static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS ("text/x-raw, format=pango-markup") ); #define gst_ssa_parse_parent_class parent_class G_DEFINE_TYPE (GstSsaParse, gst_ssa_parse, GST_TYPE_ELEMENT); GST_ELEMENT_REGISTER_DEFINE_WITH_CODE (ssaparse, "ssaparse", GST_RANK_PRIMARY, GST_TYPE_SSA_PARSE, sub_parse_element_init (plugin)); static GstStateChangeReturn gst_ssa_parse_change_state (GstElement * element, GstStateChange transition); static gboolean gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps); static gboolean gst_ssa_parse_src_event (GstPad * pad, GstObject * parent, GstEvent * event); static gboolean gst_ssa_parse_sink_event (GstPad * pad, GstObject * parent, GstEvent * event); static GstFlowReturn gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf); static void gst_ssa_parse_dispose (GObject * object) { GstSsaParse *parse = GST_SSA_PARSE (object); g_free (parse->ini); parse->ini = NULL; GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object)); } static void gst_ssa_parse_init (GstSsaParse * parse) { parse->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink"); gst_pad_set_chain_function (parse->sinkpad, GST_DEBUG_FUNCPTR (gst_ssa_parse_chain)); gst_pad_set_event_function (parse->sinkpad, GST_DEBUG_FUNCPTR (gst_ssa_parse_sink_event)); gst_element_add_pad (GST_ELEMENT (parse), parse->sinkpad); parse->srcpad = gst_pad_new_from_static_template (&src_templ, "src"); gst_pad_set_event_function (parse->srcpad, GST_DEBUG_FUNCPTR (gst_ssa_parse_src_event)); gst_element_add_pad (GST_ELEMENT (parse), parse->srcpad); gst_pad_use_fixed_caps (parse->srcpad); parse->ini = NULL; parse->framed = FALSE; parse->send_tags = FALSE; } static void gst_ssa_parse_class_init (GstSsaParseClass * klass) { GObjectClass *object_class = G_OBJECT_CLASS (klass); GstElementClass *element_class = GST_ELEMENT_CLASS (klass); object_class->dispose = gst_ssa_parse_dispose; gst_element_class_add_static_pad_template (element_class, &sink_templ); gst_element_class_add_static_pad_template (element_class, &src_templ); gst_element_class_set_static_metadata (element_class, "SSA Subtitle Parser", "Codec/Decoder/Subtitle", "Parses SSA subtitle streams", "Tim-Philipp Müller "); GST_DEBUG_CATEGORY_INIT (ssa_parse_debug, "ssaparse", 0, "SSA subtitle parser"); element_class->change_state = GST_DEBUG_FUNCPTR (gst_ssa_parse_change_state); } static gboolean gst_ssa_parse_src_event (GstPad * pad, GstObject * parent, GstEvent * event) { return gst_pad_event_default (pad, parent, event); } static gboolean gst_ssa_parse_sink_event (GstPad * pad, GstObject * parent, GstEvent * event) { gboolean res; switch (GST_EVENT_TYPE (event)) { case GST_EVENT_CAPS: { GstCaps *caps; gst_event_parse_caps (event, &caps); res = gst_ssa_parse_setcaps (pad, caps); gst_event_unref (event); break; } default: res = gst_pad_event_default (pad, parent, event); break; } return res; } #ifndef HAVE_MEMMEM // memmem() is a GNU extension so if it's not available we'll need // our own implementation here. Thanks C. static void * my_memmem (const void *haystack, size_t haystacklen, const void *needle, size_t needlelen) { const guint8 *cur, *end; if (needlelen > haystacklen) return NULL; if (needlelen == 0) return (void *) haystack; cur = haystack; end = cur + haystacklen - needlelen; for (; cur <= end; cur++) { if (memcmp (cur, needle, needlelen) == 0) return (void *) cur; } return NULL; } #else #define my_memmem memmem #endif static gboolean gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps) { GstSsaParse *parse = GST_SSA_PARSE (GST_PAD_PARENT (sinkpad)); GstCaps *outcaps; const GValue *val; GstStructure *s; const guchar bom_utf8[] = { 0xEF, 0xBB, 0xBF }; const guint8 header[] = "[Script Info]"; const gchar *end; GstBuffer *priv; GstMapInfo map; gchar *ptr; gsize left, bad_offset; gboolean ret; s = gst_caps_get_structure (caps, 0); val = gst_structure_get_value (s, "codec_data"); if (val == NULL) { parse->framed = FALSE; GST_ERROR ("Only SSA subtitles embedded in containers are supported"); return FALSE; } parse->framed = TRUE; parse->send_tags = TRUE; priv = (GstBuffer *) g_value_get_boxed (val); g_return_val_if_fail (priv != NULL, FALSE); gst_buffer_ref (priv); if (!gst_buffer_map (priv, &map, GST_MAP_READ)) { gst_buffer_unref (priv); return FALSE; } GST_MEMDUMP_OBJECT (parse, "init section", map.data, map.size); ptr = (gchar *) map.data; left = map.size; /* skip UTF-8 BOM */ if (left >= 3 && memcmp (ptr, bom_utf8, 3) == 0) { ptr += 3; left -= 3; } if (!my_memmem (ptr, left, header, sizeof (header) - 1)) goto invalid_init; if (!g_utf8_validate (ptr, left, &end)) { bad_offset = (gsize) (end - ptr); GST_WARNING_OBJECT (parse, "Init section is not valid UTF-8. Problem at " "byte offset %" G_GSIZE_FORMAT, bad_offset); /* continue with valid UTF-8 data */ left = bad_offset; } /* FIXME: parse initial section */ if (parse->ini) g_free (parse->ini); parse->ini = g_strndup (ptr, left); GST_LOG_OBJECT (parse, "Init section:\n%s", parse->ini); gst_buffer_unmap (priv, &map); gst_buffer_unref (priv); outcaps = gst_caps_new_simple ("text/x-raw", "format", G_TYPE_STRING, "pango-markup", NULL); ret = gst_pad_set_caps (parse->srcpad, outcaps); gst_caps_unref (outcaps); return ret; /* ERRORS */ invalid_init: { GST_WARNING_OBJECT (parse, "Invalid Init section - no Script Info header"); gst_buffer_unmap (priv, &map); gst_buffer_unref (priv); return FALSE; } } #ifdef my_memmem #undef my_memmem #endif static gboolean gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt) { gchar *t, *end; gboolean removed_any = FALSE; while ((t = strchr (txt, '{'))) { end = strchr (t, '}'); if (end == NULL) { GST_WARNING_OBJECT (parse, "Missing { for style override code"); return removed_any; } /* move terminating NUL character forward as well */ memmove (t, end + 1, strlen (end + 1) + 1); removed_any = TRUE; } /* these may occur outside of curly brackets. We don't handle the different * wrapping modes yet, so just remove these markers from the text for now */ while ((t = strstr (txt, "\\n"))) { t[0] = ' '; t[1] = '\n'; } while ((t = strstr (txt, "\\N"))) { t[0] = ' '; t[1] = '\n'; } while ((t = strstr (txt, "\\h"))) { t[0] = ' '; t[1] = ' '; } return removed_any; } /** * gst_ssa_parse_push_line: * @parse: caller element * @txt: text to push * @start: timestamp for the buffer * @duration: duration for the buffer * * Parse the text in a buffer with the given properties and * push it to the srcpad of the @parse element * * Returns: result of the push of the created buffer */ static GstFlowReturn gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt, GstClockTime start, GstClockTime duration) { GstFlowReturn ret; GstBuffer *buf; gchar *t, *escaped; gint num, i, len; num = atoi (txt); GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT, num, GST_TIME_ARGS (start)); /* skip all non-text fields before the actual text */ t = txt; for (i = 0; i < 8; ++i) { t = strchr (t, ','); if (t == NULL) return GST_FLOW_ERROR; ++t; } GST_LOG_OBJECT (parse, "Text : %s", t); if (gst_ssa_parse_remove_override_codes (parse, t)) { GST_LOG_OBJECT (parse, "Clean: %s", t); } /* we claim to output pango markup, so we must escape the * text even if we don't actually use any pango markup yet */ escaped = g_markup_printf_escaped ("%s", t); len = strlen (escaped); /* allocate enough for a terminating NUL, but don't include it in buf size */ buf = gst_buffer_new_and_alloc (len + 1); gst_buffer_fill (buf, 0, escaped, len + 1); gst_buffer_set_size (buf, len); g_free (escaped); GST_BUFFER_TIMESTAMP (buf) = start; GST_BUFFER_DURATION (buf) = duration; GST_LOG_OBJECT (parse, "Pushing buffer with timestamp %" GST_TIME_FORMAT " and duration %" GST_TIME_FORMAT, GST_TIME_ARGS (start), GST_TIME_ARGS (duration)); ret = gst_pad_push (parse->srcpad, buf); if (ret != GST_FLOW_OK) { GST_DEBUG_OBJECT (parse, "Push of text '%s' returned flow %s", txt, gst_flow_get_name (ret)); } return ret; } static GstFlowReturn gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf) { GstFlowReturn ret; GstSsaParse *parse = GST_SSA_PARSE (parent); GstClockTime ts; gchar *txt; GstMapInfo map; if (G_UNLIKELY (!parse->framed)) goto not_framed; if (G_UNLIKELY (parse->send_tags)) { GstTagList *tags; tags = gst_tag_list_new_empty (); gst_tag_list_add (tags, GST_TAG_MERGE_APPEND, GST_TAG_SUBTITLE_CODEC, "SubStation Alpha", NULL); gst_pad_push_event (parse->srcpad, gst_event_new_tag (tags)); parse->send_tags = FALSE; } /* make double-sure it's 0-terminated and all */ gst_buffer_map (buf, &map, GST_MAP_READ); txt = g_strndup ((gchar *) map.data, map.size); gst_buffer_unmap (buf, &map); if (txt == NULL) goto empty_text; ts = GST_BUFFER_TIMESTAMP (buf); ret = gst_ssa_parse_push_line (parse, txt, ts, GST_BUFFER_DURATION (buf)); if (ret != GST_FLOW_OK && GST_CLOCK_TIME_IS_VALID (ts)) { GstSegment segment; /* just advance time without sending anything */ gst_segment_init (&segment, GST_FORMAT_TIME); segment.start = ts; segment.time = ts; gst_pad_push_event (parse->srcpad, gst_event_new_segment (&segment)); ret = GST_FLOW_OK; } gst_buffer_unref (buf); g_free (txt); return ret; /* ERRORS */ not_framed: { GST_ELEMENT_ERROR (parse, STREAM, FORMAT, (NULL), ("Only SSA subtitles embedded in containers are supported")); gst_buffer_unref (buf); return GST_FLOW_NOT_NEGOTIATED; } empty_text: { GST_ELEMENT_WARNING (parse, STREAM, FORMAT, (NULL), ("Received empty subtitle")); gst_buffer_unref (buf); return GST_FLOW_OK; } } static GstStateChangeReturn gst_ssa_parse_change_state (GstElement * element, GstStateChange transition) { GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS; GstSsaParse *parse = GST_SSA_PARSE (element); switch (transition) { case GST_STATE_CHANGE_READY_TO_PAUSED: break; default: break; } ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition); if (ret == GST_STATE_CHANGE_FAILURE) return ret; switch (transition) { case GST_STATE_CHANGE_PAUSED_TO_READY: g_free (parse->ini); parse->ini = NULL; parse->framed = FALSE; break; default: break; } return ret; }