mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-18 06:16:36 +00:00
subparse: remove libxml dependency for sami parser and re-enable sami parser
To celebrate 2013.gnome.asia, updated sami parser for gstreamer 1.x. :D Remove conditional block for check libxml usage and implement a simple html markup parser for the sami parser. https://bugzilla.gnome.org/show_bug.cgi?id=693056
This commit is contained in:
parent
b0eb99baaa
commit
f597efe24b
3 changed files with 587 additions and 192 deletions
|
@ -1,10 +1,6 @@
|
|||
plugin_LTLIBRARIES = libgstsubparse.la
|
||||
|
||||
if USE_XML
|
||||
SAMIPARSE_SOURCES = samiparse.c samiparse.h
|
||||
else
|
||||
SAMIPARSE_SOURCES =
|
||||
endif
|
||||
|
||||
libgstsubparse_la_SOURCES = \
|
||||
gstssaparse.c \
|
||||
|
|
|
@ -55,7 +55,6 @@ gst_sub_parse_get_property (GObject * object, guint prop_id,
|
|||
GValue * value, GParamSpec * pspec);
|
||||
|
||||
|
||||
#ifndef GST_DISABLE_XML
|
||||
static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
|
||||
GST_PAD_SINK,
|
||||
GST_PAD_ALWAYS,
|
||||
|
@ -63,15 +62,6 @@ static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
|
|||
"application/x-subtitle-tmplayer; application/x-subtitle-mpl2; "
|
||||
"application/x-subtitle-dks; application/x-subtitle-qttext")
|
||||
);
|
||||
#else
|
||||
static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
|
||||
GST_PAD_SINK,
|
||||
GST_PAD_ALWAYS,
|
||||
GST_STATIC_CAPS ("application/x-subtitle; application/x-subtitle-dks; "
|
||||
"application/x-subtitle-tmplayer; application/x-subtitle-mpl2; "
|
||||
"application/x-subtitle-qttext")
|
||||
);
|
||||
#endif
|
||||
|
||||
static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
|
||||
GST_PAD_SRC,
|
||||
|
@ -107,11 +97,9 @@ gst_sub_parse_dispose (GObject * object)
|
|||
case GST_SUB_PARSE_FORMAT_QTTEXT:
|
||||
qttext_context_deinit (&subparse->state);
|
||||
break;
|
||||
#ifndef GST_DISABLE_XML
|
||||
case GST_SUB_PARSE_FORMAT_SAMI:
|
||||
sami_context_deinit (&subparse->state);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -1173,11 +1161,9 @@ parser_state_dispose (GstSubParse * self, ParserState * state)
|
|||
}
|
||||
if (state->user_data) {
|
||||
switch (self->parser_type) {
|
||||
#ifndef GST_DISABLE_XML
|
||||
case GST_SUB_PARSE_FORMAT_SAMI:
|
||||
sami_context_reset (state);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -1283,13 +1269,11 @@ gst_sub_parse_data_format_autodetect (gchar * match_str)
|
|||
GST_LOG ("MPSub (time based) format detected");
|
||||
return GST_SUB_PARSE_FORMAT_MPSUB;
|
||||
}
|
||||
#ifndef GST_DISABLE_XML
|
||||
if (strstr (match_str, "<SAMI>") != NULL ||
|
||||
strstr (match_str, "<sami>") != NULL) {
|
||||
GST_LOG ("SAMI (time based) format detected");
|
||||
return GST_SUB_PARSE_FORMAT_SAMI;
|
||||
}
|
||||
#endif
|
||||
/* we're boldly assuming the first subtitle appears within the first hour */
|
||||
if (sscanf (match_str, "0:%02u:%02u:", &n1, &n2) == 2 ||
|
||||
sscanf (match_str, "0:%02u:%02u=", &n1, &n2) == 2 ||
|
||||
|
@ -1348,13 +1332,11 @@ gst_sub_parse_format_autodetect (GstSubParse * self)
|
|||
self->parse_line = parse_mpsub;
|
||||
return gst_caps_new_simple ("text/x-raw",
|
||||
"format", G_TYPE_STRING, "utf8", NULL);
|
||||
#ifndef GST_DISABLE_XML
|
||||
case GST_SUB_PARSE_FORMAT_SAMI:
|
||||
self->parse_line = parse_sami;
|
||||
sami_context_init (&self->state);
|
||||
return gst_caps_new_simple ("text/x-raw",
|
||||
"format", G_TYPE_STRING, "pango-markup", NULL);
|
||||
#endif
|
||||
case GST_SUB_PARSE_FORMAT_TMPLAYER:
|
||||
self->parse_line = parse_tmplayer;
|
||||
self->state.max_duration = 5 * GST_SECOND;
|
||||
|
@ -1409,10 +1391,8 @@ feed_textbuf (GstSubParse * self, GstBuffer * buf)
|
|||
parser_state_init (&self->state);
|
||||
g_string_truncate (self->textbuf, 0);
|
||||
gst_adapter_clear (self->adapter);
|
||||
#ifndef GST_DISABLE_XML
|
||||
if (self->parser_type == GST_SUB_PARSE_FORMAT_SAMI)
|
||||
sami_context_reset (&self->state);
|
||||
#endif
|
||||
/* we could set a flag to make sure that the next buffer we push out also
|
||||
* has the DISCONT flag set, but there's no point really given that it's
|
||||
* subtitles which are discontinuous by nature. */
|
||||
|
@ -1690,10 +1670,8 @@ GST_STATIC_CAPS ("application/x-subtitle-tmplayer");
|
|||
static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle");
|
||||
#define MPL2_CAPS (gst_static_caps_get (&mpl2_caps))
|
||||
|
||||
#ifndef GST_DISABLE_XML
|
||||
static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami");
|
||||
#define SAMI_CAPS (gst_static_caps_get (&smi_caps))
|
||||
#endif
|
||||
|
||||
static GstStaticCaps dks_caps = GST_STATIC_CAPS ("application/x-subtitle-dks");
|
||||
#define DKS_CAPS (gst_static_caps_get (&dks_caps))
|
||||
|
@ -1774,12 +1752,10 @@ gst_subparse_type_find (GstTypeFind * tf, gpointer private)
|
|||
GST_DEBUG ("MPSub format detected");
|
||||
caps = SUB_CAPS;
|
||||
break;
|
||||
#ifndef GST_DISABLE_XML
|
||||
case GST_SUB_PARSE_FORMAT_SAMI:
|
||||
GST_DEBUG ("SAMI (time-based) format detected");
|
||||
caps = SAMI_CAPS;
|
||||
break;
|
||||
#endif
|
||||
case GST_SUB_PARSE_FORMAT_TMPLAYER:
|
||||
GST_DEBUG ("TMPlayer (time based) format detected");
|
||||
caps = TMP_CAPS;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* GStreamer SAMI subtitle parser
|
||||
* Copyright (c) 2006 Young-Ho Cha <ganadist at chollian net>
|
||||
* Copyright (c) 2006, 2013 Young-Ho Cha <ganadist at gmail com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
|
@ -19,8 +19,9 @@
|
|||
|
||||
#include "samiparse.h"
|
||||
|
||||
#include <libxml/HTMLparser.h>
|
||||
#include <glib.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ITALIC_TAG 'i'
|
||||
#define SPAN_TAG 's'
|
||||
|
@ -28,6 +29,8 @@
|
|||
#define RT_TAG 't'
|
||||
#define CLEAR_TAG '0'
|
||||
|
||||
typedef struct _HtmlParser HtmlParser;
|
||||
typedef struct _HtmlContext HtmlContext;
|
||||
typedef struct _GstSamiContext GstSamiContext;
|
||||
|
||||
struct _GstSamiContext
|
||||
|
@ -43,7 +46,7 @@ struct _GstSamiContext
|
|||
* that tags can be closed properly on
|
||||
* 'sync' tags. See _context_push_state()
|
||||
* and _context_pop_state(). */
|
||||
htmlParserCtxtPtr htmlctxt; /* html parser context */
|
||||
HtmlContext *htmlctxt; /* html parser context */
|
||||
gboolean has_result; /* set when ready to push out result */
|
||||
gboolean in_sync; /* flag to avoid appending anything except the
|
||||
* content of the sync elements to buf */
|
||||
|
@ -51,6 +54,525 @@ struct _GstSamiContext
|
|||
guint64 time2; /* current start attribute in sync tag */
|
||||
};
|
||||
|
||||
struct _HtmlParser
|
||||
{
|
||||
void (*start_element) (HtmlContext * ctx,
|
||||
const gchar * name, const gchar ** attr, gpointer user_data);
|
||||
void (*end_element) (HtmlContext * ctx,
|
||||
const gchar * name, gpointer user_data);
|
||||
void (*text) (HtmlContext * ctx,
|
||||
const gchar * text, gsize text_len, gpointer user_data);
|
||||
};
|
||||
|
||||
struct _HtmlContext
|
||||
{
|
||||
const HtmlParser *parser;
|
||||
gpointer user_data;
|
||||
GString *buf;
|
||||
};
|
||||
|
||||
static HtmlContext *
|
||||
html_context_new (HtmlParser * parser, gpointer user_data)
|
||||
{
|
||||
HtmlContext *ctxt = (HtmlContext *) g_new0 (HtmlContext, 1);
|
||||
ctxt->parser = parser;
|
||||
ctxt->user_data = user_data;
|
||||
ctxt->buf = g_string_new (NULL);
|
||||
return ctxt;
|
||||
}
|
||||
|
||||
static void
|
||||
html_context_free (HtmlContext * ctxt)
|
||||
{
|
||||
g_string_free (ctxt->buf, TRUE);
|
||||
g_free (ctxt);
|
||||
}
|
||||
|
||||
struct EntityMap
|
||||
{
|
||||
const gunichar unescaped;
|
||||
const gchar *escaped;
|
||||
};
|
||||
|
||||
struct EntityMap XmlEntities[] = {
|
||||
{34, "quot;"},
|
||||
{38, "amp;"},
|
||||
{39, "apos;"},
|
||||
{60, "lt;"},
|
||||
{62, "gt;"},
|
||||
{0, NULL},
|
||||
};
|
||||
|
||||
struct EntityMap HtmlEntities[] = {
|
||||
/* nbsp will handle manually
|
||||
{ 160, "nbsp;" }, */
|
||||
{161, "iexcl;"},
|
||||
{162, "cent;"},
|
||||
{163, "pound;"},
|
||||
{164, "curren;"},
|
||||
{165, "yen;"},
|
||||
{166, "brvbar;"},
|
||||
{167, "sect;"},
|
||||
{168, "uml;"},
|
||||
{169, "copy;"},
|
||||
{170, "ordf;"},
|
||||
{171, "laquo;"},
|
||||
{172, "not;"},
|
||||
{173, "shy;"},
|
||||
{174, "reg;"},
|
||||
{175, "macr;"},
|
||||
{176, "deg;"},
|
||||
{177, "plusmn;"},
|
||||
{178, "sup2;"},
|
||||
{179, "sup3;"},
|
||||
{180, "acute;"},
|
||||
{181, "micro;"},
|
||||
{182, "para;"},
|
||||
{183, "middot;"},
|
||||
{184, "cedil;"},
|
||||
{185, "sup1;"},
|
||||
{186, "ordm;"},
|
||||
{187, "raquo;"},
|
||||
{188, "frac14;"},
|
||||
{189, "frac12;"},
|
||||
{190, "frac34;"},
|
||||
{191, "iquest;"},
|
||||
{192, "Agrave;"},
|
||||
{193, "Aacute;"},
|
||||
{194, "Acirc;"},
|
||||
{195, "Atilde;"},
|
||||
{196, "Auml;"},
|
||||
{197, "Aring;"},
|
||||
{198, "AElig;"},
|
||||
{199, "Ccedil;"},
|
||||
{200, "Egrave;"},
|
||||
{201, "Eacute;"},
|
||||
{202, "Ecirc;"},
|
||||
{203, "Euml;"},
|
||||
{204, "Igrave;"},
|
||||
{205, "Iacute;"},
|
||||
{206, "Icirc;"},
|
||||
{207, "Iuml;"},
|
||||
{208, "ETH;"},
|
||||
{209, "Ntilde;"},
|
||||
{210, "Ograve;"},
|
||||
{211, "Oacute;"},
|
||||
{212, "Ocirc;"},
|
||||
{213, "Otilde;"},
|
||||
{214, "Ouml;"},
|
||||
{215, "times;"},
|
||||
{216, "Oslash;"},
|
||||
{217, "Ugrave;"},
|
||||
{218, "Uacute;"},
|
||||
{219, "Ucirc;"},
|
||||
{220, "Uuml;"},
|
||||
{221, "Yacute;"},
|
||||
{222, "THORN;"},
|
||||
{223, "szlig;"},
|
||||
{224, "agrave;"},
|
||||
{225, "aacute;"},
|
||||
{226, "acirc;"},
|
||||
{227, "atilde;"},
|
||||
{228, "auml;"},
|
||||
{229, "aring;"},
|
||||
{230, "aelig;"},
|
||||
{231, "ccedil;"},
|
||||
{232, "egrave;"},
|
||||
{233, "eacute;"},
|
||||
{234, "ecirc;"},
|
||||
{235, "euml;"},
|
||||
{236, "igrave;"},
|
||||
{237, "iacute;"},
|
||||
{238, "icirc;"},
|
||||
{239, "iuml;"},
|
||||
{240, "eth;"},
|
||||
{241, "ntilde;"},
|
||||
{242, "ograve;"},
|
||||
{243, "oacute;"},
|
||||
{244, "ocirc;"},
|
||||
{245, "otilde;"},
|
||||
{246, "ouml;"},
|
||||
{247, "divide;"},
|
||||
{248, "oslash;"},
|
||||
{249, "ugrave;"},
|
||||
{250, "uacute;"},
|
||||
{251, "ucirc;"},
|
||||
{252, "uuml;"},
|
||||
{253, "yacute;"},
|
||||
{254, "thorn;"},
|
||||
{255, "yuml;"},
|
||||
{338, "OElig;"},
|
||||
{339, "oelig;"},
|
||||
{352, "Scaron;"},
|
||||
{353, "scaron;"},
|
||||
{376, "Yuml;"},
|
||||
{402, "fnof;"},
|
||||
{710, "circ;"},
|
||||
{732, "tilde;"},
|
||||
{913, "Alpha;"},
|
||||
{914, "Beta;"},
|
||||
{915, "Gamma;"},
|
||||
{916, "Delta;"},
|
||||
{917, "Epsilon;"},
|
||||
{918, "Zeta;"},
|
||||
{919, "Eta;"},
|
||||
{920, "Theta;"},
|
||||
{921, "Iota;"},
|
||||
{922, "Kappa;"},
|
||||
{923, "Lambda;"},
|
||||
{924, "Mu;"},
|
||||
{925, "Nu;"},
|
||||
{926, "Xi;"},
|
||||
{927, "Omicron;"},
|
||||
{928, "Pi;"},
|
||||
{929, "Rho;"},
|
||||
{931, "Sigma;"},
|
||||
{932, "Tau;"},
|
||||
{933, "Upsilon;"},
|
||||
{934, "Phi;"},
|
||||
{935, "Chi;"},
|
||||
{936, "Psi;"},
|
||||
{937, "Omega;"},
|
||||
{945, "alpha;"},
|
||||
{946, "beta;"},
|
||||
{947, "gamma;"},
|
||||
{948, "delta;"},
|
||||
{949, "epsilon;"},
|
||||
{950, "zeta;"},
|
||||
{951, "eta;"},
|
||||
{952, "theta;"},
|
||||
{953, "iota;"},
|
||||
{954, "kappa;"},
|
||||
{955, "lambda;"},
|
||||
{956, "mu;"},
|
||||
{957, "nu;"},
|
||||
{958, "xi;"},
|
||||
{959, "omicron;"},
|
||||
{960, "pi;"},
|
||||
{961, "rho;"},
|
||||
{962, "sigmaf;"},
|
||||
{963, "sigma;"},
|
||||
{964, "tau;"},
|
||||
{965, "upsilon;"},
|
||||
{966, "phi;"},
|
||||
{967, "chi;"},
|
||||
{968, "psi;"},
|
||||
{969, "omega;"},
|
||||
{977, "thetasym;"},
|
||||
{978, "upsih;"},
|
||||
{982, "piv;"},
|
||||
{8194, "ensp;"},
|
||||
{8195, "emsp;"},
|
||||
{8201, "thinsp;"},
|
||||
{8204, "zwnj;"},
|
||||
{8205, "zwj;"},
|
||||
{8206, "lrm;"},
|
||||
{8207, "rlm;"},
|
||||
{8211, "ndash;"},
|
||||
{8212, "mdash;"},
|
||||
{8216, "lsquo;"},
|
||||
{8217, "rsquo;"},
|
||||
{8218, "sbquo;"},
|
||||
{8220, "ldquo;"},
|
||||
{8221, "rdquo;"},
|
||||
{8222, "bdquo;"},
|
||||
{8224, "dagger;"},
|
||||
{8225, "Dagger;"},
|
||||
{8226, "bull;"},
|
||||
{8230, "hellip;"},
|
||||
{8240, "permil;"},
|
||||
{8242, "prime;"},
|
||||
{8243, "Prime;"},
|
||||
{8249, "lsaquo;"},
|
||||
{8250, "rsaquo;"},
|
||||
{8254, "oline;"},
|
||||
{8260, "frasl;"},
|
||||
{8364, "euro;"},
|
||||
{8465, "image;"},
|
||||
{8472, "weierp;"},
|
||||
{8476, "real;"},
|
||||
{8482, "trade;"},
|
||||
{8501, "alefsym;"},
|
||||
{8592, "larr;"},
|
||||
{8593, "uarr;"},
|
||||
{8594, "rarr;"},
|
||||
{8595, "darr;"},
|
||||
{8596, "harr;"},
|
||||
{8629, "crarr;"},
|
||||
{8656, "lArr;"},
|
||||
{8657, "uArr;"},
|
||||
{8658, "rArr;"},
|
||||
{8659, "dArr;"},
|
||||
{8660, "hArr;"},
|
||||
{8704, "forall;"},
|
||||
{8706, "part;"},
|
||||
{8707, "exist;"},
|
||||
{8709, "empty;"},
|
||||
{8711, "nabla;"},
|
||||
{8712, "isin;"},
|
||||
{8713, "notin;"},
|
||||
{8715, "ni;"},
|
||||
{8719, "prod;"},
|
||||
{8721, "sum;"},
|
||||
{8722, "minus;"},
|
||||
{8727, "lowast;"},
|
||||
{8730, "radic;"},
|
||||
{8733, "prop;"},
|
||||
{8734, "infin;"},
|
||||
{8736, "ang;"},
|
||||
{8743, "and;"},
|
||||
{8744, "or;"},
|
||||
{8745, "cap;"},
|
||||
{8746, "cup;"},
|
||||
{8747, "int;"},
|
||||
{8756, "there4;"},
|
||||
{8764, "sim;"},
|
||||
{8773, "cong;"},
|
||||
{8776, "asymp;"},
|
||||
{8800, "ne;"},
|
||||
{8801, "equiv;"},
|
||||
{8804, "le;"},
|
||||
{8805, "ge;"},
|
||||
{8834, "sub;"},
|
||||
{8835, "sup;"},
|
||||
{8836, "nsub;"},
|
||||
{8838, "sube;"},
|
||||
{8839, "supe;"},
|
||||
{8853, "oplus;"},
|
||||
{8855, "otimes;"},
|
||||
{8869, "perp;"},
|
||||
{8901, "sdot;"},
|
||||
{8968, "lceil;"},
|
||||
{8969, "rceil;"},
|
||||
{8970, "lfloor;"},
|
||||
{8971, "rfloor;"},
|
||||
{9001, "lang;"},
|
||||
{9002, "rang;"},
|
||||
{9674, "loz;"},
|
||||
{9824, "spades;"},
|
||||
{9827, "clubs;"},
|
||||
{9829, "hearts;"},
|
||||
{9830, "diams;"},
|
||||
{0, NULL},
|
||||
};
|
||||
|
||||
static gchar *
|
||||
unescape_string (const gchar * text)
|
||||
{
|
||||
gint i;
|
||||
GString *unescaped = g_string_new (NULL);
|
||||
|
||||
while (*text) {
|
||||
if (*text == '&') {
|
||||
text++;
|
||||
|
||||
/* unescape   and */
|
||||
if (!g_ascii_strncasecmp (text, "nbsp", 4)) {
|
||||
unescaped = g_string_append_unichar (unescaped, 160);
|
||||
text += 4;
|
||||
if (*text == ';') {
|
||||
text++;
|
||||
}
|
||||
goto next;
|
||||
}
|
||||
|
||||
/* pass xml entities. these will be processed as pango markup */
|
||||
for (i = 0; XmlEntities[i].escaped; i++) {
|
||||
gssize len = strlen (XmlEntities[i].escaped);
|
||||
if (!g_ascii_strncasecmp (text, XmlEntities[i].escaped, len)) {
|
||||
unescaped = g_string_append_c (unescaped, '&');
|
||||
unescaped =
|
||||
g_string_append_len (unescaped, XmlEntities[i].escaped, len);
|
||||
text += len;
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
|
||||
/* convert html entities */
|
||||
for (i = 0; HtmlEntities[i].escaped; i++) {
|
||||
gssize len = strlen (HtmlEntities[i].escaped);
|
||||
if (!strncmp (text, HtmlEntities[i].escaped, len)) {
|
||||
unescaped =
|
||||
g_string_append_unichar (unescaped, HtmlEntities[i].unescaped);
|
||||
text += len;
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
|
||||
if (*text == '#') {
|
||||
gboolean is_hex = FALSE;
|
||||
gunichar l;
|
||||
gchar *end = NULL;
|
||||
|
||||
text++;
|
||||
if (*text == 'x') {
|
||||
is_hex = TRUE;
|
||||
text++;
|
||||
}
|
||||
errno = 0;
|
||||
if (is_hex) {
|
||||
l = strtoul (text, &end, 16);
|
||||
} else {
|
||||
l = strtoul (text, &end, 10);
|
||||
}
|
||||
|
||||
if (text == end || errno != 0) {
|
||||
/* error occured. pass it */
|
||||
goto next;
|
||||
}
|
||||
unescaped = g_string_append_unichar (unescaped, l);
|
||||
text = end;
|
||||
|
||||
if (*text == ';') {
|
||||
text++;
|
||||
}
|
||||
goto next;
|
||||
}
|
||||
|
||||
/* escape & */
|
||||
unescaped = g_string_append (unescaped, "&");
|
||||
|
||||
next:
|
||||
continue;
|
||||
|
||||
} else if (g_ascii_isspace (*text)) {
|
||||
unescaped = g_string_append_c (unescaped, ' ');
|
||||
/* strip whitespace */
|
||||
do {
|
||||
text++;
|
||||
} while ((*text) && g_ascii_isspace (*text));
|
||||
} else {
|
||||
unescaped = g_string_append_c (unescaped, *text);
|
||||
text++;
|
||||
}
|
||||
}
|
||||
|
||||
return g_string_free (unescaped, FALSE);
|
||||
}
|
||||
|
||||
static const gchar *
|
||||
string_token (const gchar * string, const gchar * delimiter, gchar ** first)
|
||||
{
|
||||
gchar *next = strstr (string, delimiter);
|
||||
if (next) {
|
||||
*first = strndup (string, next - string);
|
||||
} else {
|
||||
*first = strdup (string);
|
||||
}
|
||||
return next;
|
||||
}
|
||||
|
||||
static void
|
||||
html_context_handle_element (HtmlContext * ctxt,
|
||||
const gchar * string, gboolean must_close)
|
||||
{
|
||||
gchar *name = NULL;
|
||||
gint count = 0, i;
|
||||
gchar **attrs;
|
||||
const gchar *found, *next;
|
||||
|
||||
/* split element name and attributes */
|
||||
next = string_token (string, " ", &name);
|
||||
|
||||
if (next) {
|
||||
/* count attributes */
|
||||
found = next + 1;
|
||||
while (TRUE) {
|
||||
found = strchr (found, '=');
|
||||
if (!found)
|
||||
break;
|
||||
found++;
|
||||
count++;
|
||||
}
|
||||
} else {
|
||||
count = 0;
|
||||
}
|
||||
|
||||
attrs = g_new0 (gchar *, (count + 1) * 2);
|
||||
|
||||
for (i = 0; i < count; i += 2) {
|
||||
gchar *attr_name = NULL, *attr_value = NULL;
|
||||
gsize length;
|
||||
next = string_token (next + 1, "=", &attr_name);
|
||||
next = string_token (next + 1, " ", &attr_value);
|
||||
|
||||
/* strip " or ' from attribute value */
|
||||
if (attr_value[0] == '"' || attr_value[0] == '\'') {
|
||||
gchar *tmp = strdup (attr_value + 1);
|
||||
g_free (attr_value);
|
||||
attr_value = tmp;
|
||||
}
|
||||
|
||||
length = strlen (attr_value);
|
||||
if (attr_value[length - 1] == '"' || attr_value[length - 1] == '\'') {
|
||||
attr_value[length - 1] = '\0';
|
||||
}
|
||||
|
||||
attrs[i] = attr_name;
|
||||
attrs[i + 1] = attr_value;
|
||||
}
|
||||
|
||||
ctxt->parser->start_element (ctxt, name,
|
||||
(const gchar **) attrs, ctxt->user_data);
|
||||
if (must_close) {
|
||||
ctxt->parser->end_element (ctxt, name, ctxt->user_data);
|
||||
}
|
||||
g_strfreev (attrs);
|
||||
g_free (name);
|
||||
}
|
||||
|
||||
static void
|
||||
html_context_parse (HtmlContext * ctxt, gchar * text, gsize text_len)
|
||||
{
|
||||
const gchar *next = NULL;
|
||||
ctxt->buf = g_string_append_len (ctxt->buf, text, text_len);
|
||||
next = ctxt->buf->str;
|
||||
while (TRUE) {
|
||||
if (next[0] == '<') {
|
||||
gchar *element = NULL;
|
||||
/* find <blahblah> */
|
||||
if (!strchr (next, '>')) {
|
||||
/* no tag end point. buffer will be process in next time */
|
||||
return;
|
||||
}
|
||||
|
||||
next = string_token (next, ">", &element);
|
||||
next++;
|
||||
if (g_str_has_suffix (next, "/")) {
|
||||
/* handle <blah/> */
|
||||
element[strlen (element) - 1] = '\0';
|
||||
html_context_handle_element (ctxt, element + 1, TRUE);
|
||||
} else if (element[1] == '/') {
|
||||
/* handle </blah> */
|
||||
ctxt->parser->end_element (ctxt, element + 2, ctxt->user_data);
|
||||
} else {
|
||||
/* handle <blah> */
|
||||
html_context_handle_element (ctxt, element + 1, FALSE);
|
||||
}
|
||||
g_free (element);
|
||||
} else if (strchr (next, '<')) {
|
||||
gchar *text = NULL;
|
||||
gsize length;
|
||||
next = string_token (next, "<", &text);
|
||||
text = g_strstrip (text);
|
||||
length = strlen (text);
|
||||
ctxt->parser->text (ctxt, text, length, ctxt->user_data);
|
||||
g_free (text);
|
||||
|
||||
} else {
|
||||
gchar *text = (gchar *) next;
|
||||
gsize length;
|
||||
text = g_strstrip (text);
|
||||
length = strlen (text);
|
||||
ctxt->parser->text (ctxt, text, length, ctxt->user_data);
|
||||
ctxt->buf = g_string_assign (ctxt->buf, "");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ctxt->buf = g_string_assign (ctxt->buf, next);
|
||||
}
|
||||
|
||||
static gchar *
|
||||
has_tag (GString * str, const gchar tag)
|
||||
{
|
||||
|
@ -116,26 +638,27 @@ sami_context_pop_state (GstSamiContext * sctx, char state)
|
|||
}
|
||||
|
||||
static void
|
||||
handle_start_sync (GstSamiContext * sctx, const xmlChar ** atts)
|
||||
handle_start_sync (GstSamiContext * sctx, const gchar ** atts)
|
||||
{
|
||||
int i;
|
||||
|
||||
sami_context_pop_state (sctx, CLEAR_TAG);
|
||||
if (atts != NULL) {
|
||||
for (i = 0; (atts[i] != NULL); i += 2) {
|
||||
const xmlChar *key, *value;
|
||||
const gchar *key, *value;
|
||||
|
||||
key = atts[i];
|
||||
value = atts[i + 1];
|
||||
|
||||
if (!value)
|
||||
continue;
|
||||
if (!xmlStrncmp ((const xmlChar *) "start", key, 5)) {
|
||||
if (!g_ascii_strcasecmp ("start", key)) {
|
||||
/* Only set a new start time if we don't have text pending */
|
||||
if (sctx->resultbuf->len == 0)
|
||||
sctx->time1 = sctx->time2;
|
||||
|
||||
sctx->time2 = atoi ((const char *) value) * GST_MSECOND;
|
||||
sctx->time2 = MAX (sctx->time2, sctx->time1);
|
||||
g_string_append (sctx->resultbuf, sctx->buf->str);
|
||||
sctx->has_result = (sctx->resultbuf->len != 0) ? TRUE : FALSE;
|
||||
g_string_truncate (sctx->buf, 0);
|
||||
|
@ -145,7 +668,7 @@ handle_start_sync (GstSamiContext * sctx, const xmlChar ** atts)
|
|||
}
|
||||
|
||||
static void
|
||||
handle_start_font (GstSamiContext * sctx, const xmlChar ** atts)
|
||||
handle_start_font (GstSamiContext * sctx, const gchar ** atts)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -153,53 +676,53 @@ handle_start_font (GstSamiContext * sctx, const xmlChar ** atts)
|
|||
if (atts != NULL) {
|
||||
g_string_append (sctx->buf, "<span");
|
||||
for (i = 0; (atts[i] != NULL); i += 2) {
|
||||
const xmlChar *key, *value;
|
||||
const gchar *key, *value;
|
||||
|
||||
key = atts[i];
|
||||
value = atts[i + 1];
|
||||
|
||||
if (!value)
|
||||
continue;
|
||||
if (!xmlStrncmp ((const xmlChar *) "color", key, 5)) {
|
||||
if (!g_ascii_strcasecmp ("color", key)) {
|
||||
/*
|
||||
* There are invalid color value in many
|
||||
* sami files.
|
||||
* It will fix hex color value that start without '#'
|
||||
*/
|
||||
const gchar *sharp = "";
|
||||
int len = xmlStrlen (value);
|
||||
int len = strlen (value);
|
||||
|
||||
if (!(*value == '#' && len == 7)) {
|
||||
gchar *r;
|
||||
|
||||
/* check if it looks like hex */
|
||||
if (strtol ((const char *) value, &r, 16) >= 0 &&
|
||||
((xmlChar *) r == (value + 6) && len == 6)) {
|
||||
((gchar *) r == (value + 6) && len == 6)) {
|
||||
sharp = "#";
|
||||
}
|
||||
}
|
||||
/* some colours can be found in many sami files, but X RGB database
|
||||
* doesn't contain a colour by this name, so map explicitly */
|
||||
if (!xmlStrncasecmp (value, (const xmlChar *) "aqua", len)) {
|
||||
value = (const xmlChar *) "#00ffff";
|
||||
} else if (!xmlStrncasecmp (value, (const xmlChar *) "crimson", len)) {
|
||||
value = (const xmlChar *) "#dc143c";
|
||||
} else if (!xmlStrncasecmp (value, (const xmlChar *) "fuchsia", len)) {
|
||||
value = (const xmlChar *) "#ff00ff";
|
||||
} else if (!xmlStrncasecmp (value, (const xmlChar *) "indigo", len)) {
|
||||
value = (const xmlChar *) "#4b0082";
|
||||
} else if (!xmlStrncasecmp (value, (const xmlChar *) "lime", len)) {
|
||||
value = (const xmlChar *) "#00ff00";
|
||||
} else if (!xmlStrncasecmp (value, (const xmlChar *) "olive", len)) {
|
||||
value = (const xmlChar *) "#808000";
|
||||
} else if (!xmlStrncasecmp (value, (const xmlChar *) "silver", len)) {
|
||||
value = (const xmlChar *) "#c0c0c0";
|
||||
} else if (!xmlStrncasecmp (value, (const xmlChar *) "teal", len)) {
|
||||
value = (const xmlChar *) "#008080";
|
||||
if (!g_ascii_strcasecmp ("aqua", value)) {
|
||||
value = "#00ffff";
|
||||
} else if (!g_ascii_strcasecmp ("crimson", value)) {
|
||||
value = "#dc143c";
|
||||
} else if (!g_ascii_strcasecmp ("fuchsia", value)) {
|
||||
value = "#ff00ff";
|
||||
} else if (!g_ascii_strcasecmp ("indigo", value)) {
|
||||
value = "#4b0082";
|
||||
} else if (!g_ascii_strcasecmp ("lime", value)) {
|
||||
value = "#00ff00";
|
||||
} else if (!g_ascii_strcasecmp ("olive", value)) {
|
||||
value = "#808000";
|
||||
} else if (!g_ascii_strcasecmp ("silver", value)) {
|
||||
value = "#c0c0c0";
|
||||
} else if (!g_ascii_strcasecmp ("teal", value)) {
|
||||
value = "#008080";
|
||||
}
|
||||
g_string_append_printf (sctx->buf, " foreground=\"%s%s\"", sharp,
|
||||
value);
|
||||
} else if (!xmlStrncasecmp ((const xmlChar *) "face", key, 4)) {
|
||||
} else if (!g_ascii_strcasecmp ("face", key)) {
|
||||
g_string_append_printf (sctx->buf, " font_family=\"%s\"", value);
|
||||
}
|
||||
}
|
||||
|
@ -209,46 +732,47 @@ handle_start_font (GstSamiContext * sctx, const xmlChar ** atts)
|
|||
}
|
||||
|
||||
static void
|
||||
start_sami_element (void *ctx, const xmlChar * name, const xmlChar ** atts)
|
||||
handle_start_element (HtmlContext * ctx, const gchar * name,
|
||||
const char **atts, gpointer user_data)
|
||||
{
|
||||
GstSamiContext *sctx = (GstSamiContext *) ctx;
|
||||
GstSamiContext *sctx = (GstSamiContext *) user_data;
|
||||
|
||||
GST_LOG ("name:%s", name);
|
||||
|
||||
if (!xmlStrncmp ((const xmlChar *) "sync", name, 4)) {
|
||||
if (!g_ascii_strcasecmp ("sync", name)) {
|
||||
handle_start_sync (sctx, atts);
|
||||
sctx->in_sync = TRUE;
|
||||
} else if (!xmlStrncmp ((const xmlChar *) "font", name, 4)) {
|
||||
} else if (!g_ascii_strcasecmp ("font", name)) {
|
||||
handle_start_font (sctx, atts);
|
||||
} else if (!xmlStrncmp ((const xmlChar *) "ruby", name, 4)) {
|
||||
} else if (!g_ascii_strcasecmp ("ruby", name)) {
|
||||
sami_context_push_state (sctx, RUBY_TAG);
|
||||
} else if (!xmlStrncmp ((const xmlChar *) "br", name, 2)) {
|
||||
} else if (!g_ascii_strcasecmp ("br", name)) {
|
||||
g_string_append_c (sctx->buf, '\n');
|
||||
/* FIXME: support for furigana/ruby once implemented in pango */
|
||||
} else if (!xmlStrncmp ((const xmlChar *) "rt", name, 2)) {
|
||||
} else if (!g_ascii_strcasecmp ("rt", name)) {
|
||||
if (has_tag (sctx->state, ITALIC_TAG)) {
|
||||
g_string_append (sctx->rubybuf, "<i>");
|
||||
}
|
||||
g_string_append (sctx->rubybuf, "<span size='xx-small' rise='-100'>");
|
||||
sami_context_push_state (sctx, RT_TAG);
|
||||
} else if (!xmlStrncmp ((const xmlChar *) "p", name, 1)) {
|
||||
} else if (!xmlStrncmp ((const xmlChar *) "i", name, 1)) {
|
||||
} else if (!g_ascii_strcasecmp ("i", name)) {
|
||||
g_string_append (sctx->buf, "<i>");
|
||||
sami_context_push_state (sctx, ITALIC_TAG);
|
||||
} else if (!g_ascii_strcasecmp ("p", name)) {
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
end_sami_element (void *ctx, const xmlChar * name)
|
||||
handle_end_element (HtmlContext * ctx, const char *name, gpointer user_data)
|
||||
{
|
||||
GstSamiContext *sctx = (GstSamiContext *) ctx;
|
||||
GstSamiContext *sctx = (GstSamiContext *) user_data;
|
||||
|
||||
GST_LOG ("name:%s", name);
|
||||
|
||||
if (!xmlStrncmp ((const xmlChar *) "sync", name, 4)) {
|
||||
if (!g_ascii_strcasecmp ("sync", name)) {
|
||||
sctx->in_sync = FALSE;
|
||||
} else if ((!xmlStrncmp ((const xmlChar *) "body", name, 4)) ||
|
||||
(!xmlStrncmp ((const xmlChar *) "sami", name, 4))) {
|
||||
} else if ((!g_ascii_strcasecmp ("body", name)) ||
|
||||
(!g_ascii_strcasecmp ("sami", name))) {
|
||||
/* We will usually have one buffer left when the body is closed
|
||||
* as we need the next sync to actually send it */
|
||||
if (sctx->buf->len != 0) {
|
||||
|
@ -261,90 +785,40 @@ end_sami_element (void *ctx, const xmlChar * name)
|
|||
sctx->has_result = (sctx->resultbuf->len != 0) ? TRUE : FALSE;
|
||||
g_string_truncate (sctx->buf, 0);
|
||||
}
|
||||
} else if (!xmlStrncmp ((const xmlChar *) "font", name, 4)) {
|
||||
} else if (!g_ascii_strcasecmp ("font", name)) {
|
||||
sami_context_pop_state (sctx, SPAN_TAG);
|
||||
} else if (!xmlStrncmp ((const xmlChar *) "ruby", name, 4)) {
|
||||
} else if (!g_ascii_strcasecmp ("ruby", name)) {
|
||||
sami_context_pop_state (sctx, RUBY_TAG);
|
||||
} else if (!xmlStrncmp ((const xmlChar *) "i", name, 1)) {
|
||||
} else if (!g_ascii_strcasecmp ("i", name)) {
|
||||
sami_context_pop_state (sctx, ITALIC_TAG);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
characters_sami (void *ctx, const xmlChar * ch, int len)
|
||||
handle_text (HtmlContext * ctx, const gchar * text, gsize text_len,
|
||||
gpointer user_data)
|
||||
{
|
||||
GstSamiContext *sctx = (GstSamiContext *) ctx;
|
||||
gchar *escaped;
|
||||
gchar *tmp;
|
||||
gint i;
|
||||
GstSamiContext *sctx = (GstSamiContext *) user_data;
|
||||
|
||||
/* Skip everything except content of the sync elements */
|
||||
if (!sctx->in_sync)
|
||||
return;
|
||||
|
||||
escaped = g_markup_escape_text ((const gchar *) ch, len);
|
||||
g_strstrip (escaped);
|
||||
|
||||
/* Remove double spaces forom the string as those are
|
||||
* usually added by newlines and indention */
|
||||
tmp = escaped;
|
||||
for (i = 0; i <= strlen (escaped); i++) {
|
||||
escaped[i] = *tmp;
|
||||
if (*tmp != ' ') {
|
||||
tmp++;
|
||||
continue;
|
||||
}
|
||||
while (*tmp == ' ')
|
||||
tmp++;
|
||||
}
|
||||
|
||||
if (has_tag (sctx->state, RT_TAG)) {
|
||||
g_string_append_c (sctx->rubybuf, ' ');
|
||||
g_string_append (sctx->rubybuf, escaped);
|
||||
g_string_append (sctx->rubybuf, text);
|
||||
g_string_append_c (sctx->rubybuf, ' ');
|
||||
} else {
|
||||
g_string_append (sctx->buf, escaped);
|
||||
g_string_append (sctx->buf, text);
|
||||
}
|
||||
g_free (escaped);
|
||||
}
|
||||
|
||||
static xmlSAXHandler samiSAXHandlerStruct = {
|
||||
NULL, /* internalSubset */
|
||||
NULL, /* isStandalone */
|
||||
NULL, /* hasInternalSubset */
|
||||
NULL, /* hasExternalSubset */
|
||||
NULL, /* resolveEntity */
|
||||
NULL, /* getEntity */
|
||||
NULL, /* entityDecl */
|
||||
NULL, /* notationDecl */
|
||||
NULL, /* attributeDecl */
|
||||
NULL, /* elementDecl */
|
||||
NULL, /* unparsedEntityDecl */
|
||||
NULL, /* setDocumentLocator */
|
||||
NULL, /* startDocument */
|
||||
NULL, /* endDocument */
|
||||
start_sami_element, /* startElement */
|
||||
end_sami_element, /* endElement */
|
||||
NULL, /* reference */
|
||||
characters_sami, /* characters */
|
||||
NULL, /* ignorableWhitespace */
|
||||
NULL, /* processingInstruction */
|
||||
NULL, /* comment */
|
||||
NULL, /* xmlParserWarning */
|
||||
NULL, /* xmlParserError */
|
||||
NULL, /* xmlParserError */
|
||||
NULL, /* getParameterEntity */
|
||||
NULL, /* cdataBlock */
|
||||
NULL, /* externalSubset */
|
||||
1, /* initialized */
|
||||
NULL, /* private */
|
||||
NULL, /* startElementNsSAX2Func */
|
||||
NULL, /* endElementNsSAX2Func */
|
||||
NULL /* xmlStructuredErrorFunc */
|
||||
static HtmlParser samiParser = {
|
||||
handle_start_element, /* start_element */
|
||||
handle_end_element, /* end_element */
|
||||
handle_text, /* text */
|
||||
};
|
||||
|
||||
static xmlSAXHandlerPtr samiSAXHandler = &samiSAXHandlerStruct;
|
||||
|
||||
void
|
||||
sami_context_init (ParserState * state)
|
||||
{
|
||||
|
@ -354,8 +828,7 @@ sami_context_init (ParserState * state)
|
|||
state->user_data = (gpointer) g_new0 (GstSamiContext, 1);
|
||||
context = (GstSamiContext *) state->user_data;
|
||||
|
||||
context->htmlctxt = htmlCreatePushParserCtxt (samiSAXHandler, context,
|
||||
"", 0, NULL, XML_CHAR_ENCODING_UTF8);
|
||||
context->htmlctxt = html_context_new (&samiParser, context);
|
||||
context->buf = g_string_new ("");
|
||||
context->rubybuf = g_string_new ("");
|
||||
context->resultbuf = g_string_new ("");
|
||||
|
@ -368,17 +841,8 @@ sami_context_deinit (ParserState * state)
|
|||
GstSamiContext *context = (GstSamiContext *) state->user_data;
|
||||
|
||||
if (context) {
|
||||
htmlParserCtxtPtr htmlctxt = context->htmlctxt;
|
||||
|
||||
/* destroy sax context */
|
||||
htmlDocPtr doc;
|
||||
|
||||
htmlParseChunk (htmlctxt, "", 0, 1);
|
||||
doc = htmlctxt->myDoc;
|
||||
htmlFreeParserCtxt (htmlctxt);
|
||||
html_context_free (context->htmlctxt);
|
||||
context->htmlctxt = NULL;
|
||||
if (doc)
|
||||
xmlFreeDoc (doc);
|
||||
g_string_free (context->buf, TRUE);
|
||||
g_string_free (context->rubybuf, TRUE);
|
||||
g_string_free (context->resultbuf, TRUE);
|
||||
|
@ -405,70 +869,29 @@ sami_context_reset (ParserState * state)
|
|||
}
|
||||
}
|
||||
|
||||
static gchar *
|
||||
fix_invalid_entities (const gchar * line)
|
||||
{
|
||||
const gchar *cp, *pp; /* current pointer, previous pointer */
|
||||
gssize size;
|
||||
GString *ret = g_string_new (NULL);
|
||||
|
||||
pp = line;
|
||||
cp = strchr (line, '&');
|
||||
while (cp) {
|
||||
size = cp - pp;
|
||||
ret = g_string_append_len (ret, pp, size);
|
||||
cp++;
|
||||
if (g_ascii_strncasecmp (cp, "nbsp;", 5)
|
||||
&& (!g_ascii_strncasecmp (cp, "nbsp", 4))) {
|
||||
/* translate " " to " " */
|
||||
ret = g_string_append_len (ret, " ", 6);
|
||||
cp += 4;
|
||||
} else if (g_ascii_strncasecmp (cp, "quot;", 5)
|
||||
&& g_ascii_strncasecmp (cp, "amp;", 4)
|
||||
&& g_ascii_strncasecmp (cp, "apos;", 5)
|
||||
&& g_ascii_strncasecmp (cp, "lt;", 3)
|
||||
&& g_ascii_strncasecmp (cp, "gt;", 3)
|
||||
&& g_ascii_strncasecmp (cp, "nbsp;", 5)
|
||||
&& cp[0] != '#') {
|
||||
/* translate "&" to "&" */
|
||||
ret = g_string_append_len (ret, "&", 5);
|
||||
} else {
|
||||
/* do not translate */
|
||||
ret = g_string_append_c (ret, '&');
|
||||
}
|
||||
|
||||
pp = cp;
|
||||
cp = strchr (pp, '&');
|
||||
}
|
||||
ret = g_string_append (ret, pp);
|
||||
return g_string_free (ret, FALSE);
|
||||
}
|
||||
|
||||
gchar *
|
||||
parse_sami (ParserState * state, const gchar * line)
|
||||
{
|
||||
gchar *fixed_line;
|
||||
gchar *ret = NULL;
|
||||
GstSamiContext *context = (GstSamiContext *) state->user_data;
|
||||
|
||||
fixed_line = fix_invalid_entities (line);
|
||||
htmlParseChunk (context->htmlctxt, fixed_line, strlen (fixed_line), 0);
|
||||
g_free (fixed_line);
|
||||
gchar *unescaped = unescape_string (line);
|
||||
html_context_parse (context->htmlctxt, (gchar *) unescaped,
|
||||
strlen (unescaped));
|
||||
g_free (unescaped);
|
||||
|
||||
if (context->has_result) {
|
||||
gchar *r;
|
||||
|
||||
if (context->rubybuf->len) {
|
||||
context->rubybuf = g_string_append_c (context->rubybuf, '\n');
|
||||
g_string_prepend (context->resultbuf, context->rubybuf->str);
|
||||
context->rubybuf = g_string_truncate (context->rubybuf, 0);
|
||||
}
|
||||
|
||||
r = g_string_free (context->resultbuf, FALSE);
|
||||
ret = g_string_free (context->resultbuf, FALSE);
|
||||
context->resultbuf = g_string_new ("");
|
||||
state->start_time = context->time1;
|
||||
state->duration = context->time2 - context->time1;
|
||||
context->has_result = FALSE;
|
||||
return r;
|
||||
}
|
||||
return NULL;
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue