gst/subparse/: Add support for SAMI subtitles (#169936).

Original commit message from CVS:
Patch by: Young-Ho Cha  <ganadist at chollian net>
* gst/subparse/Makefile.am:
* gst/subparse/gstsubparse.c: (gst_sub_parse_dispose),
(parser_state_dispose), (gst_sub_parse_data_format_autodetect),
(gst_sub_parse_format_autodetect), (feed_textbuf),
(gst_subparse_type_find), (plugin_init):
* gst/subparse/gstsubparse.h:
* gst/subparse/samiparse.c:
* gst/subparse/samiparse.h:
Add support for SAMI subtitles (#169936).
This commit is contained in:
Young-Ho Cha 2006-05-15 09:14:35 +00:00 committed by Tim-Philipp Müller
parent c29226beaf
commit e0cbb1036c
6 changed files with 500 additions and 12 deletions

View file

@ -1,3 +1,17 @@
2006-05-15 Tim-Philipp Müller <tim at centricular dot net>
Patch by: Young-Ho Cha <ganadist at chollian net>
* gst/subparse/Makefile.am:
* gst/subparse/gstsubparse.c: (gst_sub_parse_dispose),
(parser_state_dispose), (gst_sub_parse_data_format_autodetect),
(gst_sub_parse_format_autodetect), (feed_textbuf),
(gst_subparse_type_find), (plugin_init):
* gst/subparse/gstsubparse.h:
* gst/subparse/samiparse.c:
* gst/subparse/samiparse.h:
Add support for SAMI subtitles (#169936).
2006-05-14 Michael Smith <msmith@fluendo.com>
* gst/audioconvert/gstchannelmix.c: (gst_channel_mix_fill_others):

View file

@ -2,7 +2,11 @@ plugin_LTLIBRARIES = libgstsubparse.la
libgstsubparse_la_SOURCES = \
gstssaparse.c \
gstsubparse.c
gstssaparse.h \
gstsubparse.c \
gstsubparse.h \
samiparse.c \
samiparse.h
libgstsubparse_la_CFLAGS = $(GST_CFLAGS)
libgstsubparse_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS)
@ -10,4 +14,5 @@ libgstsubparse_la_LIBADD = $(GST_LIBS)
noinst_HEADERS = \
gstssaparse.h \
gstsubparse.h
gstsubparse.h \
samiparse.h

View file

@ -29,6 +29,7 @@
#include "gstsubparse.h"
#include "gstssaparse.h"
#include "samiparse.h"
GST_DEBUG_CATEGORY_STATIC (sub_parse_debug);
#define GST_CAT_DEFAULT sub_parse_debug
@ -40,11 +41,19 @@ GST_ELEMENT_DETAILS ("Subtitle parser",
"Gustavo J. A. M. Carneiro <gjc@inescporto.pt>\n"
"Ronald S. Bultje <rbultje@ronald.bitfreak.net>");
#ifndef GST_DISABLE_LOADSAVE_REGISTRY
static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
GST_PAD_SINK,
GST_PAD_ALWAYS,
GST_STATIC_CAPS ("application/x-subtitle; application/x-subtitle-sami")
);
#else
static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
GST_PAD_SINK,
GST_PAD_ALWAYS,
GST_STATIC_CAPS ("application/x-subtitle")
);
#endif
static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
GST_PAD_SRC,
@ -114,6 +123,7 @@ gst_sub_parse_dispose (GObject * object)
gst_segment_free (subparse->segment);
subparse->segment = NULL;
}
sami_context_deinit (&subparse->state);
GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object));
}
@ -569,6 +579,9 @@ parser_state_dispose (ParserState * state)
g_string_free (state->buf, TRUE);
state->buf = NULL;
}
if (state->user_data) {
sami_context_reset (state);
}
}
/*
@ -602,19 +615,23 @@ gst_sub_parse_data_format_autodetect (gchar * match_str)
}
if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) {
GST_LOG ("subparse: MicroDVD (frame based) format detected");
GST_LOG ("MicroDVD (frame based) format detected");
return GST_SUB_PARSE_FORMAT_MDVDSUB;
}
if (regexec (&subrip_rx, match_str, 0, NULL, 0) == 0) {
GST_LOG ("subparse: SubRip (time based) format detected");
GST_LOG ("SubRip (time based) format detected");
return GST_SUB_PARSE_FORMAT_SUBRIP;
}
if (!strncmp (match_str, "FORMAT=TIME", 11)) {
GST_LOG ("subparse: MPSub (time based) format detected");
GST_LOG ("MPSub (time based) format detected");
return GST_SUB_PARSE_FORMAT_MPSUB;
}
if (!g_ascii_strncasecmp (match_str, "<SAMI>", 6)) {
GST_LOG ("SAMI (time based) format detected");
return GST_SUB_PARSE_FORMAT_SAMI;
}
GST_WARNING ("subparse: subtitle format autodetection failed!");
GST_DEBUG ("no subtitle format detected");
return GST_SUB_PARSE_FORMAT_UNKNOWN;
}
@ -646,6 +663,10 @@ gst_sub_parse_format_autodetect (GstSubParse * self)
case GST_SUB_PARSE_FORMAT_MPSUB:
self->parse_line = parse_mpsub;
return gst_caps_new_simple ("text/plain", NULL);
case GST_SUB_PARSE_FORMAT_SAMI:
self->parse_line = parse_sami;
sami_context_init (&self->state);
return gst_caps_new_simple ("text/x-pango-markup", NULL);
case GST_SUB_PARSE_FORMAT_UNKNOWN:
default:
GST_DEBUG ("no subtitle format detected");
@ -662,6 +683,7 @@ feed_textbuf (GstSubParse * self, GstBuffer * buf)
/* flush the parser state */
parser_state_init (&self->state);
g_string_truncate (self->textbuf, 0);
sami_context_reset (&self->state);
}
self->textbuf = g_string_append_len (self->textbuf,
@ -866,15 +888,18 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition)
* Typefind support.
*/
static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami");
static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle");
#define SUB_CAPS (gst_static_caps_get (&sub_caps))
#define SAMI_CAPS (gst_static_caps_get (&smi_caps))
static void
gst_subparse_type_find (GstTypeFind * tf, gpointer private)
{
const guint8 *data;
GstSubParseFormat format;
const guint8 *data;
GstCaps *caps;
gchar *str;
if (!(data = gst_type_find_peek (tf, 0, 36)))
@ -888,26 +913,34 @@ gst_subparse_type_find (GstTypeFind * tf, gpointer private)
switch (format) {
case GST_SUB_PARSE_FORMAT_MDVDSUB:
GST_DEBUG ("MicroDVD format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_SUBRIP:
GST_DEBUG ("SubRip format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_MPSUB:
GST_DEBUG ("MPSub format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_SAMI:
GST_DEBUG ("SAMI (time-based) format detected");
caps = SAMI_CAPS;
break;
default:
case GST_SUB_PARSE_FORMAT_UNKNOWN:
GST_DEBUG ("no subtitle format detected");
return;
}
/* if we're here, it's ok */
gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SUB_CAPS);
gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, caps);
}
static gboolean
plugin_init (GstPlugin * plugin)
{
static gchar *sub_exts[] = { "srt", "sub", "mpsub", "mdvd", NULL };
static gchar *sub_exts[] = { "srt", "sub", "mpsub", "mdvd", "smi", NULL };
GST_DEBUG_CATEGORY_INIT (sub_parse_debug, "subparse", 0, ".sub parser");

View file

@ -30,10 +30,10 @@ G_BEGIN_DECLS
#define GST_SUBPARSE(obj) \
(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_SUBPARSE, GstSubParse))
#define GST_SUBPARSE_CLASS(klass) \
(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_SUBPARSE, GstSubParse))
(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_SUBPARSE, GstSubParseClass))
#define GST_IS_SUBPARSE(obj) \
(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_SUBPARSE))
#define GST_IS_SUBPARSE_CLASS(obj) \
#define GST_IS_SUBPARSE_CLASS(klass) \
(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_SUBPARSE))
typedef struct _GstSubParse GstSubParse;
@ -45,7 +45,8 @@ typedef enum
GST_SUB_PARSE_FORMAT_UNKNOWN = 0,
GST_SUB_PARSE_FORMAT_MDVDSUB = 1,
GST_SUB_PARSE_FORMAT_SUBRIP = 2,
GST_SUB_PARSE_FORMAT_MPSUB = 3
GST_SUB_PARSE_FORMAT_MPSUB = 3,
GST_SUB_PARSE_FORMAT_SAMI = 4
} GstSubParseFormat;
typedef struct {
@ -54,6 +55,7 @@ typedef struct {
guint64 start_time;
guint64 duration;
GstSegment *segment;
gpointer user_data;
} ParserState;
typedef gchar* (*Parser) (ParserState *state, const gchar *line);

396
gst/subparse/samiparse.c Normal file
View file

@ -0,0 +1,396 @@
/* GStreamer SAMI subtitle parser
* Copyright (c) 2006 Young-Ho Cha <ganadist at chollian net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include "samiparse.h"
/* FIXME: use Makefile stuff */
#ifndef GST_DISABLE_LOADSAVE_REGISTRY
#include <libxml/HTMLparser.h>
#include <string.h>
#define ITALIC_TAG 'i'
#define COLOR_TAG 'c'
#define RUBY_TAG 'r'
#define RT_TAG 't'
#define CLEAR_TAG '0'
typedef struct _GstSamiContext GstSamiContext;
struct _GstSamiContext
{
GString *buf; /* buffer to collect content */
GString *rubybuf; /* buffer to collect ruby content */
GString *resultbuf; /* when opening the next 'sync' tag, move
* from 'buf' to avoid to append following
* content */
GString *state; /* in many sami files there are tags that
* are not closed, so for each open tag the
* parser will append a tag flag here so
* that tags can be closed properly on
* 'sync' tags. See _context_push_state()
* and _context_pop_state(). */
htmlParserCtxtPtr htmlctxt; /* html parser context */
gboolean has_result; /* set when ready to push out result */
gboolean in_title; /* flag to avoid appending the title content
* to buf */
guint64 time1; /* previous start attribute in sync tag */
guint64 time2; /* current start attribute in sync tag */
};
static gchar *
has_tag (GString * str, const gchar tag)
{
return strrchr (str->str, tag);
}
static void
sami_context_push_state (GstSamiContext * sctx, char state)
{
g_string_append_c (sctx->state, state);
}
static void
sami_context_pop_state (GstSamiContext * sctx, char state)
{
GString *str = g_string_new ("");
GString *context_state = sctx->state;
int i;
for (i = context_state->len - 1; i >= 0; i--) {
switch (context_state->str[i]) {
case ITALIC_TAG: /* <i> */
{
g_string_append (str, "</i>");
break;
}
case COLOR_TAG: /* <span foreground= > */
{
g_string_append (str, "</span>");
break;
}
case RUBY_TAG: /* <span size= > -- ruby */
{
break;
}
case RT_TAG: /* ruby */
{
/* FIXME: support for furigana/ruby once implemented in pango */
g_string_append (sctx->rubybuf, "</span>");
if (has_tag (context_state, ITALIC_TAG)) {
g_string_append (sctx->rubybuf, "</i>");
}
break;
}
default:
break;
}
if (context_state->str[i] == state) {
g_string_append (sctx->buf, str->str);
g_string_free (str, TRUE);
g_string_truncate (context_state, i);
return;
}
}
if (state == CLEAR_TAG) {
g_string_append (sctx->buf, str->str);
g_string_truncate (context_state, 0);
}
g_string_free (str, TRUE);
}
static void
handle_start_sync (GstSamiContext * sctx, const xmlChar ** atts)
{
int i;
sami_context_pop_state (sctx, CLEAR_TAG);
if (atts != NULL) {
for (i = 0; (atts[i] != NULL); i += 2) {
const xmlChar *key, *value;
key = atts[i];
value = atts[i + 1];
if (!value)
continue;
if (!xmlStrncmp ((const xmlChar *) "start", key, 5)) {
sctx->time1 = sctx->time2;
sctx->time2 = atoi ((const char *) value) * GST_MSECOND;
sctx->has_result = TRUE;
g_string_append (sctx->resultbuf, sctx->buf->str);
g_string_truncate (sctx->buf, 0);
}
}
}
}
static void
handle_start_font (GstSamiContext * sctx, const xmlChar ** atts)
{
int i;
sami_context_pop_state (sctx, COLOR_TAG);
if (atts != NULL) {
for (i = 0; (atts[i] != NULL); i += 2) {
const xmlChar *key, *value;
key = atts[i];
value = atts[i + 1];
if (!value)
continue;
if (!xmlStrncmp ((const xmlChar *) "color", key, 5)) {
/*
* There are invalid color value in many
* sami files.
* It will fix hex color value that start without '#'
*/
gchar *sharp = "";
int len = xmlStrlen (value);
if (!(*value == '#' && len == 7)) {
gchar *r;
strtol ((const char *) value, &r, 16); /* trying onvert hex */
if (((xmlChar *) r == (value + 6) && len == 6)) {
sharp = "#";
}
}
g_string_append_printf (sctx->buf, "<span foreground=\"%s%s\">", sharp,
value);
sami_context_push_state (sctx, COLOR_TAG);
}
}
}
}
static void
start_sami_element (void *ctx, const xmlChar * name, const xmlChar ** atts)
{
GstSamiContext *sctx = (GstSamiContext *) ctx;
if (!xmlStrncmp ((const xmlChar *) "title", name, 5)) {
sctx->in_title = TRUE;
} else if (!xmlStrncmp ((const xmlChar *) "sync", name, 4)) {
handle_start_sync (sctx, atts);
} else if (!xmlStrncmp ((const xmlChar *) "font", name, 4)) {
handle_start_font (sctx, atts);
} else if (!xmlStrncmp ((const xmlChar *) "ruby", name, 4)) {
sami_context_push_state (sctx, RUBY_TAG);
} else if (!xmlStrncmp ((const xmlChar *) "br", name, 2)) {
g_string_append_c (sctx->buf, '\n');
/* FIXME: support for furigana/ruby once implemented in pango */
} else if (!xmlStrncmp ((const xmlChar *) "rt", name, 2)) {
if (has_tag (sctx->state, ITALIC_TAG)) {
g_string_append (sctx->rubybuf, "<i>");
}
g_string_append (sctx->rubybuf, "<span size='xx-small' rise='-100'>");
sami_context_push_state (sctx, RT_TAG);
} else if (!xmlStrncmp ((const xmlChar *) "p", name, 1)) {
} else if (!xmlStrncmp ((const xmlChar *) "i", name, 1)) {
g_string_append (sctx->buf, "<i>");
sami_context_push_state (sctx, ITALIC_TAG);
}
}
static void
end_sami_element (void *ctx, const xmlChar * name)
{
GstSamiContext *sctx = (GstSamiContext *) ctx;
if (!xmlStrncmp ((const xmlChar *) "title", name, 5)) {
sctx->in_title = FALSE;
} else if (!xmlStrncmp ((const xmlChar *) "font", name, 4)) {
sami_context_pop_state (sctx, COLOR_TAG);
} else if (!xmlStrncmp ((const xmlChar *) "ruby", name, 4)) {
sami_context_pop_state (sctx, RUBY_TAG);
} else if (!xmlStrncmp ((const xmlChar *) "i", name, 1)) {
sami_context_pop_state (sctx, ITALIC_TAG);
}
}
static void
characters_sami (void *ctx, const xmlChar * ch, int len)
{
GstSamiContext *sctx = (GstSamiContext *) ctx;
/* skip title */
if (sctx->in_title)
return;
if (has_tag (sctx->state, RT_TAG)) {
g_string_append_c (sctx->rubybuf, ' ');
g_string_append_len (sctx->rubybuf, (const gchar *) ch, len);
g_string_append_c (sctx->rubybuf, ' ');
} else {
g_string_append_len (sctx->buf, (const gchar *) ch, len);
}
}
static xmlSAXHandler samiSAXHandlerStruct = {
NULL, /* internalSubset */
NULL, /* isStandalone */
NULL, /* hasInternalSubset */
NULL, /* hasExternalSubset */
NULL, /* resolveEntity */
NULL, /* getEntity */
NULL, /* entityDecl */
NULL, /* notationDecl */
NULL, /* attributeDecl */
NULL, /* elementDecl */
NULL, /* unparsedEntityDecl */
NULL, /* setDocumentLocator */
NULL, /* startDocument */
NULL, /* endDocument */
start_sami_element, /* startElement */
end_sami_element, /* endElement */
NULL, /* reference */
characters_sami, /* characters */
NULL, /* ignorableWhitespace */
NULL, /* processingInstruction */
NULL, /* comment */
NULL, /* xmlParserWarning */
NULL, /* xmlParserError */
NULL, /* xmlParserError */
NULL, /* getParameterEntity */
NULL, /* cdataBlock */
NULL, /* externalSubset */
1, /* initialized */
NULL, /* private */
NULL, /* startElementNsSAX2Func */
NULL, /* endElementNsSAX2Func */
NULL /* xmlStructuredErrorFunc */
};
static xmlSAXHandlerPtr samiSAXHandler = &samiSAXHandlerStruct;
void
sami_context_init (ParserState * state)
{
GstSamiContext *context;
g_assert (state->user_data == NULL);
state->user_data = (gpointer) g_new0 (GstSamiContext, 1);
context = (GstSamiContext *) state->user_data;
context->htmlctxt = htmlCreatePushParserCtxt (samiSAXHandler, context,
"", 0, NULL, XML_CHAR_ENCODING_UTF8);
context->buf = g_string_new ("");
context->rubybuf = g_string_new ("");
context->resultbuf = g_string_new ("");
context->state = g_string_new ("");
}
void
sami_context_deinit (ParserState * state)
{
GstSamiContext *context = (GstSamiContext *) state->user_data;
if (context) {
htmlParserCtxtPtr htmlctxt = context->htmlctxt;
/* destroy sax context */
htmlDocPtr doc;
htmlParseChunk (htmlctxt, "", 0, 1);
doc = htmlctxt->myDoc;
htmlFreeParserCtxt (htmlctxt);
context->htmlctxt = NULL;
if (doc)
xmlFreeDoc (doc);
g_string_free (context->buf, TRUE);
g_string_free (context->rubybuf, TRUE);
g_string_free (context->resultbuf, TRUE);
g_string_free (context->state, TRUE);
g_free (context);
state->user_data = NULL;
}
}
void
sami_context_reset (ParserState * state)
{
GstSamiContext *context = (GstSamiContext *) state->user_data;
if (context) {
g_string_truncate (context->buf, 0);
g_string_truncate (context->rubybuf, 0);
g_string_truncate (context->resultbuf, 0);
g_string_truncate (context->state, 0);
context->has_result = FALSE;
context->in_title = FALSE;
context->time1 = 0;
context->time2 = 0;
}
}
gchar *
parse_sami (ParserState * state, const gchar * line)
{
GstSamiContext *context = (GstSamiContext *) state->user_data;
htmlParseChunk (context->htmlctxt, line, strlen (line), 0);
if (context->has_result) {
gchar *r;
if (context->rubybuf->len) {
context->rubybuf = g_string_append_c (context->rubybuf, '\n');
g_string_prepend (context->resultbuf, context->rubybuf->str);
context->rubybuf = g_string_truncate (context->rubybuf, 0);
}
r = g_string_free (context->resultbuf, FALSE);
context->resultbuf = g_string_new ("");
state->start_time = context->time1;
state->duration = context->time2 - context->time1;
context->has_result = FALSE;
return r;
}
return NULL;
}
#else /* GST_DISABLE_LOADSAVE_REGISTRY */
gchar *
parse_sami (ParserState * state, const gchar * line)
{
/* our template caps should not include sami in this case */
g_assert_not_reached ();
}
void
sami_context_init (ParserState * state)
{
return;
}
void
sami_context_deinit (ParserState * state)
{
return;
}
void
sami_context_reset (ParserState * state)
{
return;
}
#endif /* GST_DISABLE_LOADSAVE_REGISTRY */

38
gst/subparse/samiparse.h Normal file
View file

@ -0,0 +1,38 @@
/* GStreamer SAMI subtitle parser
* Copyright (c) 2006 Young-Ho Cha <ganadist chollian net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#ifndef _SAMI_PARSE_H_
#define _SAMI_PARSE_H_
#include "gstsubparse.h"
G_BEGIN_DECLS
gchar * parse_sami (ParserState * state, const gchar * line);
void sami_context_init (ParserState * state);
void sami_context_deinit (ParserState * state);
void sami_context_reset (ParserState * state);
G_END_DECLS
#endif /* _SAMI_PARSE_H_ */