gstreamer/ext/ttml/ttmlparse.c
Chris Bass b4d9d9d9f8 ttmlparse: don't filter out untimed whitespace
Significant whitespace in elements that don't have begin/end values
should inherit timing from its parent, or if no its parents have no
timing, from the document's Root Temporal Extent. Currently, such
whitespace is removed, which is not spec-compliant. Fix this by
retaining whitespace in content nodes, and assigning a Root Temporal
Extent of 24 hours to any significant whitespace whose parents have no
associated timing.

https://bugzilla.gnome.org/show_bug.cgi?id=781027
2017-04-09 10:42:16 +03:00

1850 lines
54 KiB
C

/* GStreamer TTML subtitle parser
* Copyright (C) <2015> British Broadcasting Corporation
* Authors:
* Chris Bass <dash@rd.bbc.co.uk>
* Peter Taylour <dash@rd.bbc.co.uk>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
/*
* Parses subtitle files encoded using the EBU-TT-D profile of TTML, as defined
* in https://tech.ebu.ch/files/live/sites/tech/files/shared/tech/tech3380.pdf
* and http://www.w3.org/TR/ttaf1-dfxp/, respectively.
*/
#include <glib.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#include "ttmlparse.h"
#include "subtitle.h"
#include "subtitlemeta.h"
#define DEFAULT_CELLRES_X 32
#define DEFAULT_CELLRES_Y 15
#define MAX_FONT_FAMILY_NAME_LENGTH 128
#define NSECONDS_IN_DAY 24 * 3600 * GST_SECOND
GST_DEBUG_CATEGORY_EXTERN (ttmlparse_debug);
#define GST_CAT_DEFAULT ttmlparse_debug
static gchar *ttml_get_xml_property (const xmlNode * node, const char *name);
typedef struct _TtmlStyleSet TtmlStyleSet;
typedef struct _TtmlElement TtmlElement;
typedef struct _TtmlScene TtmlScene;
typedef enum
{
TTML_ELEMENT_TYPE_STYLE,
TTML_ELEMENT_TYPE_REGION,
TTML_ELEMENT_TYPE_BODY,
TTML_ELEMENT_TYPE_DIV,
TTML_ELEMENT_TYPE_P,
TTML_ELEMENT_TYPE_SPAN,
TTML_ELEMENT_TYPE_ANON_SPAN,
TTML_ELEMENT_TYPE_BR
} TtmlElementType;
typedef enum
{
TTML_WHITESPACE_MODE_NONE,
TTML_WHITESPACE_MODE_DEFAULT,
TTML_WHITESPACE_MODE_PRESERVE,
} TtmlWhitespaceMode;
struct _TtmlElement
{
TtmlElementType type;
gchar *id;
TtmlWhitespaceMode whitespace_mode;
gchar **styles;
gchar *region;
GstClockTime begin;
GstClockTime end;
TtmlStyleSet *style_set;
gchar *text;
guint text_index;
};
/* Represents a static scene consisting of one or more trees of elements that
* should be visible over a specific period of time. */
struct _TtmlScene
{
GstClockTime begin;
GstClockTime end;
GList *trees;
GstBuffer *buf;
};
struct _TtmlStyleSet
{
GHashTable *table;
};
static TtmlStyleSet *
ttml_style_set_new (void)
{
TtmlStyleSet *ret = g_slice_new0 (TtmlStyleSet);
ret->table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
return ret;
}
static void
ttml_style_set_delete (TtmlStyleSet * style_set)
{
if (style_set) {
g_hash_table_unref (style_set->table);
g_slice_free (TtmlStyleSet, style_set);
}
}
/* If attribute with name @attr_name already exists in @style_set, its value
* will be replaced by @attr_value. */
static gboolean
ttml_style_set_add_attr (TtmlStyleSet * style_set, const gchar * attr_name,
const gchar * attr_value)
{
return g_hash_table_insert (style_set->table, g_strdup (attr_name),
g_strdup (attr_value));
}
static gboolean
ttml_style_set_contains_attr (TtmlStyleSet * style_set, const gchar * attr_name)
{
return g_hash_table_contains (style_set->table, attr_name);
}
static const gchar *
ttml_style_set_get_attr (TtmlStyleSet * style_set, const gchar * attr_name)
{
return g_hash_table_lookup (style_set->table, attr_name);
}
static guint8
ttml_hex_pair_to_byte (const gchar * hex_pair)
{
gint hi_digit, lo_digit;
hi_digit = g_ascii_xdigit_value (*hex_pair);
lo_digit = g_ascii_xdigit_value (*(hex_pair + 1));
return (hi_digit << 4) + lo_digit;
}
/* Color strings in EBU-TT-D can have the form "#RRBBGG" or "#RRBBGGAA". */
static GstSubtitleColor
ttml_parse_colorstring (const gchar * color)
{
guint length;
const gchar *c = NULL;
GstSubtitleColor ret = { 0, 0, 0, 0 };
if (!color)
return ret;
length = strlen (color);
if (((length == 7) || (length == 9)) && *color == '#') {
c = color + 1;
ret.r = ttml_hex_pair_to_byte (c);
ret.g = ttml_hex_pair_to_byte (c + 2);
ret.b = ttml_hex_pair_to_byte (c + 4);
if (length == 7)
ret.a = G_MAXUINT8;
else
ret.a = ttml_hex_pair_to_byte (c + 6);
GST_CAT_LOG (ttmlparse_debug, "Returning color - r:%u b:%u g:%u a:%u",
ret.r, ret.b, ret.g, ret.a);
} else {
GST_CAT_ERROR (ttmlparse_debug, "Invalid color string: %s", color);
}
return ret;
}
static void
ttml_style_set_print (TtmlStyleSet * style_set)
{
GHashTableIter iter;
gpointer attr_name, attr_value;
if (!style_set) {
GST_CAT_LOG (ttmlparse_debug, "\t\t[NULL]");
return;
}
g_hash_table_iter_init (&iter, style_set->table);
while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
GST_CAT_LOG (ttmlparse_debug, "\t\t%s: %s", (const gchar *) attr_name,
(const gchar *) attr_value);
}
}
static TtmlStyleSet *
ttml_parse_style_set (const xmlNode * node)
{
TtmlStyleSet *s;
gchar *value = NULL;
xmlAttrPtr attr;
value = ttml_get_xml_property (node, "id");
if (!value) {
GST_CAT_ERROR (ttmlparse_debug, "styles must have an ID.");
return NULL;
}
g_free (value);
s = ttml_style_set_new ();
for (attr = node->properties; attr != NULL; attr = attr->next) {
if (attr->ns && ((g_strcmp0 ((const gchar *) attr->ns->prefix, "tts") == 0)
|| (g_strcmp0 ((const gchar *) attr->ns->prefix, "ebutts") == 0))) {
ttml_style_set_add_attr (s, (const gchar *) attr->name,
(const gchar *) attr->children->content);
}
}
return s;
}
static void
ttml_delete_element (TtmlElement * element)
{
g_free ((gpointer) element->id);
if (element->styles)
g_strfreev (element->styles);
g_free ((gpointer) element->region);
ttml_style_set_delete (element->style_set);
g_free ((gpointer) element->text);
g_slice_free (TtmlElement, element);
}
static gchar *
ttml_get_xml_property (const xmlNode * node, const char *name)
{
xmlChar *xml_string = NULL;
gchar *gst_string = NULL;
g_return_val_if_fail (strlen (name) < 128, NULL);
xml_string = xmlGetProp (node, (xmlChar *) name);
if (!xml_string)
return NULL;
gst_string = g_strdup ((gchar *) xml_string);
xmlFree (xml_string);
return gst_string;
}
/* EBU-TT-D timecodes have format hours:minutes:seconds[.fraction] */
static GstClockTime
ttml_parse_timecode (const gchar * timestring)
{
gchar **strings;
guint64 hours = 0, minutes = 0, seconds = 0, milliseconds = 0;
GstClockTime time = GST_CLOCK_TIME_NONE;
GST_CAT_LOG (ttmlparse_debug, "time string: %s", timestring);
strings = g_strsplit (timestring, ":", 3);
if (g_strv_length (strings) != 3U) {
GST_CAT_ERROR (ttmlparse_debug, "badly formatted time string: %s",
timestring);
return time;
}
hours = g_ascii_strtoull (strings[0], NULL, 10U);
minutes = g_ascii_strtoull (strings[1], NULL, 10U);
if (g_strstr_len (strings[2], -1, ".")) {
guint n_digits;
gchar **substrings = g_strsplit (strings[2], ".", 2);
seconds = g_ascii_strtoull (substrings[0], NULL, 10U);
n_digits = strlen (substrings[1]);
milliseconds = g_ascii_strtoull (substrings[1], NULL, 10U);
milliseconds =
(guint64) (milliseconds * pow (10.0, (3 - (double) n_digits)));
g_strfreev (substrings);
} else {
seconds = g_ascii_strtoull (strings[2], NULL, 10U);
}
if (minutes > 59 || seconds > 60) {
GST_CAT_ERROR (ttmlparse_debug, "invalid time string "
"(minutes or seconds out-of-bounds): %s\n", timestring);
}
g_strfreev (strings);
GST_CAT_LOG (ttmlparse_debug,
"hours: %" G_GUINT64_FORMAT " minutes: %" G_GUINT64_FORMAT
" seconds: %" G_GUINT64_FORMAT " milliseconds: %" G_GUINT64_FORMAT "",
hours, minutes, seconds, milliseconds);
time = hours * GST_SECOND * 3600
+ minutes * GST_SECOND * 60
+ seconds * GST_SECOND + milliseconds * GST_MSECOND;
return time;
}
static TtmlElement *
ttml_parse_element (const xmlNode * node)
{
TtmlElement *element;
TtmlElementType type;
gchar *value;
GST_CAT_DEBUG (ttmlparse_debug, "Element name: %s",
(const char *) node->name);
if ((g_strcmp0 ((const char *) node->name, "style") == 0)) {
type = TTML_ELEMENT_TYPE_STYLE;
} else if ((g_strcmp0 ((const char *) node->name, "region") == 0)) {
type = TTML_ELEMENT_TYPE_REGION;
} else if ((g_strcmp0 ((const char *) node->name, "body") == 0)) {
type = TTML_ELEMENT_TYPE_BODY;
} else if ((g_strcmp0 ((const char *) node->name, "div") == 0)) {
type = TTML_ELEMENT_TYPE_DIV;
} else if ((g_strcmp0 ((const char *) node->name, "p") == 0)) {
type = TTML_ELEMENT_TYPE_P;
} else if ((g_strcmp0 ((const char *) node->name, "span") == 0)) {
type = TTML_ELEMENT_TYPE_SPAN;
} else if ((g_strcmp0 ((const char *) node->name, "text") == 0)) {
type = TTML_ELEMENT_TYPE_ANON_SPAN;
} else if ((g_strcmp0 ((const char *) node->name, "br") == 0)) {
type = TTML_ELEMENT_TYPE_BR;
} else {
return NULL;
}
element = g_slice_new0 (TtmlElement);
element->type = type;
if ((value = ttml_get_xml_property (node, "id"))) {
element->id = g_strdup (value);
g_free (value);
}
if ((value = ttml_get_xml_property (node, "style"))) {
element->styles = g_strsplit (value, " ", 0);
GST_CAT_DEBUG (ttmlparse_debug, "%u style(s) referenced in element.",
g_strv_length (element->styles));
g_free (value);
}
if (element->type == TTML_ELEMENT_TYPE_STYLE
|| element->type == TTML_ELEMENT_TYPE_REGION) {
TtmlStyleSet *ss;
ss = ttml_parse_style_set (node);
if (ss)
element->style_set = ss;
else
GST_CAT_WARNING (ttmlparse_debug,
"Style or Region contains no styling attributes.");
}
if ((value = ttml_get_xml_property (node, "region"))) {
element->region = g_strdup (value);
g_free (value);
}
if ((value = ttml_get_xml_property (node, "begin"))) {
element->begin = ttml_parse_timecode (value);
g_free (value);
} else {
element->begin = GST_CLOCK_TIME_NONE;
}
if ((value = ttml_get_xml_property (node, "end"))) {
element->end = ttml_parse_timecode (value);
g_free (value);
} else {
element->end = GST_CLOCK_TIME_NONE;
}
if (node->content) {
GST_CAT_LOG (ttmlparse_debug, "Node content: %s", node->content);
element->text = g_strdup ((const gchar *) node->content);
}
if ((value = ttml_get_xml_property (node, "space"))) {
if (g_strcmp0 (value, "preserve") == 0)
element->whitespace_mode = TTML_WHITESPACE_MODE_PRESERVE;
else if (g_strcmp0 (value, "default") == 0)
element->whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT;
g_free (value);
}
return element;
}
static GNode *
ttml_parse_body (const xmlNode * node)
{
GNode *ret;
TtmlElement *element;
GST_CAT_LOG (ttmlparse_debug, "parsing node %s", node->name);
element = ttml_parse_element (node);
if (element)
ret = g_node_new (element);
else
return NULL;
for (node = node->children; node != NULL; node = node->next) {
GNode *descendants = NULL;
if ((descendants = ttml_parse_body (node)))
g_node_append (ret, descendants);
}
return ret;
}
/* Update the fields of a GstSubtitleStyleSet, @style_set, according to the
* values defined in a TtmlStyleSet, @tss, and a given cell resolution. */
static void
ttml_update_style_set (GstSubtitleStyleSet * style_set, TtmlStyleSet * tss,
guint cellres_x, guint cellres_y)
{
const gchar *attr;
if ((attr = ttml_style_set_get_attr (tss, "textDirection"))) {
if (g_strcmp0 (attr, "rtl") == 0)
style_set->text_direction = GST_SUBTITLE_TEXT_DIRECTION_RTL;
else
style_set->text_direction = GST_SUBTITLE_TEXT_DIRECTION_LTR;
}
if ((attr = ttml_style_set_get_attr (tss, "fontFamily"))) {
if (strlen (attr) <= MAX_FONT_FAMILY_NAME_LENGTH) {
g_free (style_set->font_family);
style_set->font_family = g_strdup (attr);
} else {
GST_CAT_WARNING (ttmlparse_debug,
"Ignoring font family name as it's overly long.");
}
}
if ((attr = ttml_style_set_get_attr (tss, "fontSize"))) {
style_set->font_size = g_ascii_strtod (attr, NULL) / 100.0;
}
style_set->font_size *= (1.0 / cellres_y);
if ((attr = ttml_style_set_get_attr (tss, "lineHeight"))) {
if (g_strcmp0 (attr, "normal") == 0)
style_set->line_height = -1;
else
style_set->line_height = g_ascii_strtod (attr, NULL) / 100.0;
}
if ((attr = ttml_style_set_get_attr (tss, "textAlign"))) {
if (g_strcmp0 (attr, "left") == 0)
style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_LEFT;
else if (g_strcmp0 (attr, "center") == 0)
style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_CENTER;
else if (g_strcmp0 (attr, "right") == 0)
style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_RIGHT;
else if (g_strcmp0 (attr, "end") == 0)
style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_END;
else
style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_START;
}
if ((attr = ttml_style_set_get_attr (tss, "color"))) {
style_set->color = ttml_parse_colorstring (attr);
}
if ((attr = ttml_style_set_get_attr (tss, "backgroundColor"))) {
style_set->background_color = ttml_parse_colorstring (attr);
}
if ((attr = ttml_style_set_get_attr (tss, "fontStyle"))) {
if (g_strcmp0 (attr, "italic") == 0)
style_set->font_style = GST_SUBTITLE_FONT_STYLE_ITALIC;
else
style_set->font_style = GST_SUBTITLE_FONT_STYLE_NORMAL;
}
if ((attr = ttml_style_set_get_attr (tss, "fontWeight"))) {
if (g_strcmp0 (attr, "bold") == 0)
style_set->font_weight = GST_SUBTITLE_FONT_WEIGHT_BOLD;
else
style_set->font_weight = GST_SUBTITLE_FONT_WEIGHT_NORMAL;
}
if ((attr = ttml_style_set_get_attr (tss, "textDecoration"))) {
if (g_strcmp0 (attr, "underline") == 0)
style_set->text_decoration = GST_SUBTITLE_TEXT_DECORATION_UNDERLINE;
else
style_set->text_decoration = GST_SUBTITLE_TEXT_DECORATION_NONE;
}
if ((attr = ttml_style_set_get_attr (tss, "unicodeBidi"))) {
if (g_strcmp0 (attr, "embed") == 0)
style_set->unicode_bidi = GST_SUBTITLE_UNICODE_BIDI_EMBED;
else if (g_strcmp0 (attr, "bidiOverride") == 0)
style_set->unicode_bidi = GST_SUBTITLE_UNICODE_BIDI_OVERRIDE;
else
style_set->unicode_bidi = GST_SUBTITLE_UNICODE_BIDI_NORMAL;
}
if ((attr = ttml_style_set_get_attr (tss, "wrapOption"))) {
if (g_strcmp0 (attr, "noWrap") == 0)
style_set->wrap_option = GST_SUBTITLE_WRAPPING_OFF;
else
style_set->wrap_option = GST_SUBTITLE_WRAPPING_ON;
}
if ((attr = ttml_style_set_get_attr (tss, "multiRowAlign"))) {
if (g_strcmp0 (attr, "start") == 0)
style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_START;
else if (g_strcmp0 (attr, "center") == 0)
style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_CENTER;
else if (g_strcmp0 (attr, "end") == 0)
style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_END;
else
style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_AUTO;
}
if ((attr = ttml_style_set_get_attr (tss, "linePadding"))) {
style_set->line_padding = g_ascii_strtod (attr, NULL);
style_set->line_padding *= (1.0 / cellres_x);
}
if ((attr = ttml_style_set_get_attr (tss, "origin"))) {
gchar *c;
style_set->origin_x = g_ascii_strtod (attr, &c) / 100.0;
while (!g_ascii_isdigit (*c) && *c != '+' && *c != '-')
++c;
style_set->origin_y = g_ascii_strtod (c, NULL) / 100.0;
}
if ((attr = ttml_style_set_get_attr (tss, "extent"))) {
gchar *c;
style_set->extent_w = g_ascii_strtod (attr, &c) / 100.0;
if ((style_set->origin_x + style_set->extent_w) > 1.0) {
style_set->extent_w = 1.0 - style_set->origin_x;
}
while (!g_ascii_isdigit (*c) && *c != '+' && *c != '-')
++c;
style_set->extent_h = g_ascii_strtod (c, NULL) / 100.0;
if ((style_set->origin_y + style_set->extent_h) > 1.0) {
style_set->extent_h = 1.0 - style_set->origin_y;
}
}
if ((attr = ttml_style_set_get_attr (tss, "displayAlign"))) {
if (g_strcmp0 (attr, "center") == 0)
style_set->display_align = GST_SUBTITLE_DISPLAY_ALIGN_CENTER;
else if (g_strcmp0 (attr, "after") == 0)
style_set->display_align = GST_SUBTITLE_DISPLAY_ALIGN_AFTER;
else
style_set->display_align = GST_SUBTITLE_DISPLAY_ALIGN_BEFORE;
}
if ((attr = ttml_style_set_get_attr (tss, "padding"))) {
gchar **decimals;
guint n_decimals;
guint i;
decimals = g_strsplit (attr, "%", 0);
n_decimals = g_strv_length (decimals) - 1;
for (i = 0; i < n_decimals; ++i)
g_strstrip (decimals[i]);
switch (n_decimals) {
case 1:
style_set->padding_start = style_set->padding_end =
style_set->padding_before = style_set->padding_after =
g_ascii_strtod (decimals[0], NULL) / 100.0;
break;
case 2:
style_set->padding_before = style_set->padding_after =
g_ascii_strtod (decimals[0], NULL) / 100.0;
style_set->padding_start = style_set->padding_end =
g_ascii_strtod (decimals[1], NULL) / 100.0;
break;
case 3:
style_set->padding_before = g_ascii_strtod (decimals[0], NULL) / 100.0;
style_set->padding_start = style_set->padding_end =
g_ascii_strtod (decimals[1], NULL) / 100.0;
style_set->padding_after = g_ascii_strtod (decimals[2], NULL) / 100.0;
break;
case 4:
style_set->padding_before = g_ascii_strtod (decimals[0], NULL) / 100.0;
style_set->padding_end = g_ascii_strtod (decimals[1], NULL) / 100.0;
style_set->padding_after = g_ascii_strtod (decimals[2], NULL) / 100.0;
style_set->padding_start = g_ascii_strtod (decimals[3], NULL) / 100.0;
break;
}
g_strfreev (decimals);
/* Padding values in TTML files are relative to the region width & height;
* make them relative to the overall display width & height like all other
* dimensions. */
style_set->padding_before *= style_set->extent_h;
style_set->padding_after *= style_set->extent_h;
style_set->padding_end *= style_set->extent_w;
style_set->padding_start *= style_set->extent_w;
}
if ((attr = ttml_style_set_get_attr (tss, "writingMode"))) {
if (g_str_has_prefix (attr, "rl"))
style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_RLTB;
else if ((g_strcmp0 (attr, "tbrl") == 0)
|| (g_strcmp0 (attr, "tb") == 0))
style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_TBRL;
else if (g_strcmp0 (attr, "tblr") == 0)
style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_TBLR;
else
style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_LRTB;
}
if ((attr = ttml_style_set_get_attr (tss, "showBackground"))) {
if (g_strcmp0 (attr, "whenActive") == 0)
style_set->show_background = GST_SUBTITLE_BACKGROUND_MODE_WHEN_ACTIVE;
else
style_set->show_background = GST_SUBTITLE_BACKGROUND_MODE_ALWAYS;
}
if ((attr = ttml_style_set_get_attr (tss, "overflow"))) {
if (g_strcmp0 (attr, "visible") == 0)
style_set->overflow = GST_SUBTITLE_OVERFLOW_MODE_VISIBLE;
else
style_set->overflow = GST_SUBTITLE_OVERFLOW_MODE_HIDDEN;
}
}
static TtmlStyleSet *
ttml_style_set_copy (TtmlStyleSet * style_set)
{
GHashTableIter iter;
gpointer attr_name, attr_value;
TtmlStyleSet *ret = ttml_style_set_new ();
g_hash_table_iter_init (&iter, style_set->table);
while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
ttml_style_set_add_attr (ret, (const gchar *) attr_name,
(const gchar *) attr_value);
}
return ret;
}
/* set2 overrides set1. Unlike style inheritance, merging will result in all
* values from set1 being merged into set2. */
static TtmlStyleSet *
ttml_style_set_merge (TtmlStyleSet * set1, TtmlStyleSet * set2)
{
TtmlStyleSet *ret = NULL;
if (set1) {
ret = ttml_style_set_copy (set1);
if (set2) {
GHashTableIter iter;
gpointer attr_name, attr_value;
g_hash_table_iter_init (&iter, set2->table);
while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
ttml_style_set_add_attr (ret, (const gchar *) attr_name,
(const gchar *) attr_value);
}
}
} else if (set2) {
ret = ttml_style_set_copy (set2);
}
return ret;
}
static gchar *
ttml_get_relative_font_size (const gchar * parent_size,
const gchar * child_size)
{
guint psize = (guint) g_ascii_strtoull (parent_size, NULL, 10U);
guint csize = (guint) g_ascii_strtoull (child_size, NULL, 10U);
csize = (csize * psize) / 100U;
return g_strdup_printf ("%u%%", csize);
}
static TtmlStyleSet *
ttml_style_set_inherit (TtmlStyleSet * parent, TtmlStyleSet * child)
{
TtmlStyleSet *ret = NULL;
GHashTableIter iter;
gpointer attr_name, attr_value;
if (child) {
ret = ttml_style_set_copy (child);
} else {
ret = ttml_style_set_new ();
}
if (!parent)
return ret;
g_hash_table_iter_init (&iter, parent->table);
while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
/* In TTML, if an element which has a defined fontSize is the child of an
* element that also has a defined fontSize, the child's font size is
* relative to that of its parent. If its parent doesn't have a defined
* fontSize, then the child's fontSize is relative to the document's cell
* size. Therefore, if the former is true, we calculate the value of
* fontSize based on the parent's fontSize; otherwise, we simply keep
* the value defined in the child's style set. */
if (g_strcmp0 ((const gchar *) attr_name, "fontSize") == 0
&& ttml_style_set_contains_attr (ret, (const gchar *) attr_name)) {
const gchar *original_child_font_size =
ttml_style_set_get_attr (child, "fontSize");
gchar *scaled_child_font_size =
ttml_get_relative_font_size ((const gchar *) attr_value,
original_child_font_size);
GST_CAT_LOG (ttmlparse_debug, "Calculated font size: %s",
scaled_child_font_size);
ttml_style_set_add_attr (ret, (const gchar *) attr_name,
scaled_child_font_size);
g_free (scaled_child_font_size);
}
/* Not all styling attributes are inherited in TTML. */
if (g_strcmp0 ((const gchar *) attr_name, "backgroundColor") != 0
&& g_strcmp0 ((const gchar *) attr_name, "origin") != 0
&& g_strcmp0 ((const gchar *) attr_name, "extent") != 0
&& g_strcmp0 ((const gchar *) attr_name, "displayAlign") != 0
&& g_strcmp0 ((const gchar *) attr_name, "overflow") != 0
&& g_strcmp0 ((const gchar *) attr_name, "padding") != 0
&& g_strcmp0 ((const gchar *) attr_name, "writingMode") != 0
&& g_strcmp0 ((const gchar *) attr_name, "showBackground") != 0
&& g_strcmp0 ((const gchar *) attr_name, "unicodeBidi") != 0) {
if (!ttml_style_set_contains_attr (ret, (const gchar *) attr_name)) {
ttml_style_set_add_attr (ret, (const gchar *) attr_name,
(const gchar *) attr_value);
}
}
}
return ret;
}
static gchar *
ttml_get_element_type_string (TtmlElement * element)
{
switch (element->type) {
case TTML_ELEMENT_TYPE_STYLE:
return g_strdup ("<style>");
break;
case TTML_ELEMENT_TYPE_REGION:
return g_strdup ("<region>");
break;
case TTML_ELEMENT_TYPE_BODY:
return g_strdup ("<body>");
break;
case TTML_ELEMENT_TYPE_DIV:
return g_strdup ("<div>");
break;
case TTML_ELEMENT_TYPE_P:
return g_strdup ("<p>");
break;
case TTML_ELEMENT_TYPE_SPAN:
return g_strdup ("<span>");
break;
case TTML_ELEMENT_TYPE_ANON_SPAN:
return g_strdup ("<anon-span>");
break;
case TTML_ELEMENT_TYPE_BR:
return g_strdup ("<br>");
break;
default:
return g_strdup ("Unknown");
break;
}
}
/* Merge styles referenced by an element. */
static gboolean
ttml_resolve_styles (GNode * node, gpointer data)
{
TtmlStyleSet *tmp = NULL;
TtmlElement *element, *style;
GHashTable *styles_table;
gchar *type_string;
guint i;
styles_table = (GHashTable *) data;
element = node->data;
type_string = ttml_get_element_type_string (element);
GST_CAT_LOG (ttmlparse_debug, "Element type: %s", type_string);
g_free (type_string);
if (!element->styles)
return FALSE;
for (i = 0; i < g_strv_length (element->styles); ++i) {
tmp = element->style_set;
style = g_hash_table_lookup (styles_table, element->styles[i]);
if (style) {
GST_CAT_LOG (ttmlparse_debug, "Merging style %s...", element->styles[i]);
element->style_set = ttml_style_set_merge (element->style_set,
style->style_set);
ttml_style_set_delete (tmp);
} else {
GST_CAT_WARNING (ttmlparse_debug,
"Element references an unknown style (%s)", element->styles[i]);
}
}
GST_CAT_LOG (ttmlparse_debug, "Style set after merging:");
ttml_style_set_print (element->style_set);
return FALSE;
}
static void
ttml_resolve_referenced_styles (GList * trees, GHashTable * styles_table)
{
GList *tree;
for (tree = g_list_first (trees); tree; tree = tree->next) {
GNode *root = (GNode *) tree->data;
g_node_traverse (root, G_PRE_ORDER, G_TRAVERSE_ALL, -1, ttml_resolve_styles,
styles_table);
}
}
/* Inherit styling attributes from parent. */
static gboolean
ttml_inherit_styles (GNode * node, gpointer data)
{
TtmlStyleSet *tmp = NULL;
TtmlElement *element, *parent;
gchar *type_string;
element = node->data;
type_string = ttml_get_element_type_string (element);
GST_CAT_LOG (ttmlparse_debug, "Element type: %s", type_string);
g_free (type_string);
if (node->parent) {
parent = node->parent->data;
if (parent->style_set) {
tmp = element->style_set;
if (element->type == TTML_ELEMENT_TYPE_ANON_SPAN) {
/* Anon spans should merge all style attributes from their parent. */
element->style_set = ttml_style_set_merge (parent->style_set,
element->style_set);
} else {
element->style_set = ttml_style_set_inherit (parent->style_set,
element->style_set);
}
ttml_style_set_delete (tmp);
}
}
GST_CAT_LOG (ttmlparse_debug, "Style set after inheriting:");
ttml_style_set_print (element->style_set);
return FALSE;
}
static void
ttml_inherit_element_styles (GList * trees)
{
GList *tree;
for (tree = g_list_first (trees); tree; tree = tree->next) {
GNode *root = (GNode *) tree->data;
g_node_traverse (root, G_PRE_ORDER, G_TRAVERSE_ALL, -1, ttml_inherit_styles,
NULL);
}
}
/* If whitespace_mode isn't explicitly set for this element, inherit from its
* parent. If this element is the root of the tree, set whitespace_mode to
* that of the overall document. */
static gboolean
ttml_inherit_element_whitespace_mode (GNode * node, gpointer data)
{
TtmlWhitespaceMode *doc_mode = (TtmlWhitespaceMode *) data;
TtmlElement *element = node->data;
TtmlElement *parent;
if (element->whitespace_mode != TTML_WHITESPACE_MODE_NONE)
return FALSE;
if (G_NODE_IS_ROOT (node)) {
element->whitespace_mode = *doc_mode;
return FALSE;
}
parent = node->parent->data;
element->whitespace_mode = parent->whitespace_mode;
return FALSE;
}
static void
ttml_inherit_whitespace_mode (GNode * tree, TtmlWhitespaceMode doc_mode)
{
g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
ttml_inherit_element_whitespace_mode, &doc_mode);
}
static gboolean
ttml_free_node_data (GNode * node, gpointer data)
{
TtmlElement *element = node->data;
ttml_delete_element (element);
return FALSE;
}
static void
ttml_delete_tree (GNode * tree)
{
g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1, ttml_free_node_data,
NULL);
g_node_destroy (tree);
}
typedef struct
{
GstClockTime begin;
GstClockTime end;
} ClipWindow;
static gboolean
ttml_clip_element_period (GNode * node, gpointer data)
{
TtmlElement *element = node->data;
ClipWindow *window = data;
if (!GST_CLOCK_TIME_IS_VALID (element->begin)) {
return FALSE;
}
if (element->begin > window->end || element->end < window->begin) {
ttml_delete_tree (node);
node = NULL;
return FALSE;
}
element->begin = MAX (element->begin, window->begin);
element->end = MIN (element->end, window->end);
return FALSE;
}
static void
ttml_apply_time_window (GNode * tree, GstClockTime window_begin,
GstClockTime window_end)
{
ClipWindow window;
window.begin = window_begin;
window.end = window_end;
g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
ttml_clip_element_period, &window);
}
static gboolean
ttml_resolve_element_timings (GNode * node, gpointer data)
{
TtmlElement *element, *leaf;
leaf = element = node->data;
if (GST_CLOCK_TIME_IS_VALID (leaf->begin)
&& GST_CLOCK_TIME_IS_VALID (leaf->end)) {
GST_CAT_LOG (ttmlparse_debug, "Leaf node already has timing.");
return FALSE;
}
/* Inherit timings from ancestor. */
while (node->parent && !GST_CLOCK_TIME_IS_VALID (element->begin)) {
node = node->parent;
element = node->data;
}
if (!GST_CLOCK_TIME_IS_VALID (element->begin)) {
GST_CAT_WARNING (ttmlparse_debug,
"No timing found for element; setting to Root Temporal Extent.");
leaf->begin = 0;
leaf->end = NSECONDS_IN_DAY;
} else {
leaf->begin = element->begin;
leaf->end = element->end;
GST_CAT_LOG (ttmlparse_debug, "Leaf begin: %" GST_TIME_FORMAT,
GST_TIME_ARGS (leaf->begin));
GST_CAT_LOG (ttmlparse_debug, "Leaf end: %" GST_TIME_FORMAT,
GST_TIME_ARGS (leaf->end));
}
return FALSE;
}
static void
ttml_resolve_timings (GNode * tree)
{
g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_LEAVES, -1,
ttml_resolve_element_timings, NULL);
}
static gboolean
ttml_resolve_leaf_region (GNode * node, gpointer data)
{
TtmlElement *element, *leaf;
leaf = element = node->data;
while (node->parent && !element->region) {
node = node->parent;
element = node->data;
}
if (element->region) {
leaf->region = g_strdup (element->region);
GST_CAT_LOG (ttmlparse_debug, "Leaf region: %s", leaf->region);
} else {
GST_CAT_WARNING (ttmlparse_debug, "No region found above leaf element.");
}
return FALSE;
}
static void
ttml_resolve_regions (GNode * tree)
{
g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_LEAVES, -1,
ttml_resolve_leaf_region, NULL);
}
typedef struct
{
GstClockTime start_time;
GstClockTime next_transition_time;
} TrState;
static gboolean
ttml_update_transition_time (GNode * node, gpointer data)
{
TtmlElement *element = node->data;
TrState *state = (TrState *) data;
if ((element->begin < state->next_transition_time)
&& (!GST_CLOCK_TIME_IS_VALID (state->start_time)
|| (element->begin > state->start_time))) {
state->next_transition_time = element->begin;
GST_CAT_LOG (ttmlparse_debug,
"Updating next transition time to element begin time (%"
GST_TIME_FORMAT ")", GST_TIME_ARGS (state->next_transition_time));
return FALSE;
}
if ((element->end < state->next_transition_time)
&& (element->end > state->start_time)) {
state->next_transition_time = element->end;
GST_CAT_LOG (ttmlparse_debug,
"Updating next transition time to element end time (%"
GST_TIME_FORMAT ")", GST_TIME_ARGS (state->next_transition_time));
}
return FALSE;
}
/* Return details about the next transition after @time. */
static GstClockTime
ttml_find_next_transition (GList * trees, GstClockTime time)
{
TrState state;
state.start_time = time;
state.next_transition_time = GST_CLOCK_TIME_NONE;
for (trees = g_list_first (trees); trees; trees = trees->next) {
GNode *tree = (GNode *) trees->data;
g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
ttml_update_transition_time, &state);
}
GST_CAT_LOG (ttmlparse_debug, "Next transition is at %" GST_TIME_FORMAT,
GST_TIME_ARGS (state.next_transition_time));
return state.next_transition_time;
}
/* Remove nodes from tree that are not visible at @time. */
static GNode *
ttml_remove_nodes_by_time (GNode * node, GstClockTime time)
{
GNode *child, *next_child;
TtmlElement *element;
element = node->data;
child = node->children;
next_child = child ? child->next : NULL;
while (child) {
ttml_remove_nodes_by_time (child, time);
child = next_child;
next_child = child ? child->next : NULL;
}
if (!node->children && ((element->begin > time) || (element->end <= time))) {
g_node_destroy (node);
node = NULL;
}
return node;
}
/* Return a list of trees containing the elements and their ancestors that are
* visible at @time. */
static GList *
ttml_get_active_trees (GList * element_trees, GstClockTime time)
{
GList *tree;
GList *ret = NULL;
for (tree = g_list_first (element_trees); tree; tree = tree->next) {
GNode *root = g_node_copy ((GNode *) tree->data);
GST_CAT_LOG (ttmlparse_debug, "There are %u nodes in tree.",
g_node_n_nodes (root, G_TRAVERSE_ALL));
root = ttml_remove_nodes_by_time (root, time);
if (root) {
GST_CAT_LOG (ttmlparse_debug,
"After filtering there are %u nodes in tree.", g_node_n_nodes (root,
G_TRAVERSE_ALL));
ret = g_list_append (ret, root);
} else {
GST_CAT_LOG (ttmlparse_debug,
"All elements have been filtered from tree.");
}
}
GST_CAT_DEBUG (ttmlparse_debug, "There are %u trees in returned list.",
g_list_length (ret));
return ret;
}
static GList *
ttml_create_scenes (GList * region_trees)
{
TtmlScene *cur_scene = NULL;
GList *output_scenes = NULL;
GList *active_trees = NULL;
GstClockTime timestamp = GST_CLOCK_TIME_NONE;
while ((timestamp = ttml_find_next_transition (region_trees, timestamp))
!= GST_CLOCK_TIME_NONE) {
GST_CAT_LOG (ttmlparse_debug,
"Next transition found at time %" GST_TIME_FORMAT,
GST_TIME_ARGS (timestamp));
if (cur_scene) {
cur_scene->end = timestamp;
output_scenes = g_list_append (output_scenes, cur_scene);
}
active_trees = ttml_get_active_trees (region_trees, timestamp);
GST_CAT_LOG (ttmlparse_debug, "There will be %u active regions after "
"transition", g_list_length (active_trees));
if (active_trees) {
cur_scene = g_slice_new0 (TtmlScene);
cur_scene->begin = timestamp;
cur_scene->trees = active_trees;
} else {
cur_scene = NULL;
}
}
return output_scenes;
}
/* Handle element whitespace in accordance with section 7.2.3 of the TTML
* specification. Specifically, this function implements the
* white-space-collapse="true" and linefeed-treatment="treat-as-space"
* behaviours. Note that stripping of whitespace at the start and end of line
* areas (suppress-at-line-break="auto" and
* white-space-treatment="ignore-if-surrounding-linefeed" behaviours) can only
* be done by the renderer once the text from multiple elements has been laid
* out in line areas. */
static gboolean
ttml_handle_element_whitespace (GNode * node, gpointer data)
{
TtmlElement *element = node->data;
guint space_count = 0;
guint textlen;
gchar *c;
if (!element->text
|| (element->whitespace_mode == TTML_WHITESPACE_MODE_PRESERVE)) {
return FALSE;
}
textlen = strlen (element->text);
for (c = element->text; TRUE; c = g_utf8_next_char (c)) {
gchar buf[6] = { 0 };
gunichar u = g_utf8_get_char (c);
gint nbytes = g_unichar_to_utf8 (u, buf);
if (nbytes == 1 && buf[0] == 0xA) {
*c = ' ';
buf[0] = 0x20;
}
if (nbytes == 1 && (buf[0] == 0x20 || buf[0] == 0x9 || buf[0] == 0xD)) {
++space_count;
} else {
if (space_count > 1) {
gchar *new_head = c - space_count + 1;
g_strlcpy (new_head, c, textlen);
c = new_head;
}
space_count = 0;
if (nbytes == 1 && buf[0] == 0x0) /* Reached end of string. */
break;
}
}
return FALSE;
}
static void
ttml_handle_whitespace (GNode * tree)
{
g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_LEAVES, -1,
ttml_handle_element_whitespace, NULL);
}
static GNode *
ttml_filter_content_nodes (GNode * node)
{
GNode *child, *next_child;
TtmlElement *element = node->data;
TtmlElement *parent = node->parent ? node->parent->data : NULL;
child = node->children;
next_child = child ? child->next : NULL;
while (child) {
ttml_filter_content_nodes (child);
child = next_child;
next_child = child ? child->next : NULL;
}
/* Only text content in <p>s and <span>s is significant. */
if (element->type == TTML_ELEMENT_TYPE_ANON_SPAN
&& parent->type != TTML_ELEMENT_TYPE_P
&& parent->type != TTML_ELEMENT_TYPE_SPAN) {
ttml_delete_element (element);
g_node_destroy (node);
node = NULL;
}
return node;
}
/* Store child elements of @node with name @element_name in @table, as long as
* @table doesn't already contain an element with the same ID. */
static void
ttml_store_unique_children (xmlNodePtr node, const gchar * element_name,
GHashTable * table)
{
xmlNodePtr ptr;
for (ptr = node->children; ptr; ptr = ptr->next) {
if (xmlStrcmp (ptr->name, (const xmlChar *) element_name) == 0) {
TtmlElement *element = ttml_parse_element (ptr);
if (element)
if (!g_hash_table_contains (table, element->id))
g_hash_table_insert (table, (gpointer) (element->id),
(gpointer) element);
}
}
}
/* Parse style and region elements from @head and store in their respective
* hash tables for future reference. */
static void
ttml_parse_head (xmlNodePtr head, GHashTable * styles_table,
GHashTable * regions_table)
{
xmlNodePtr node;
for (node = head->children; node; node = node->next) {
if (xmlStrcmp (node->name, (const xmlChar *) "styling") == 0)
ttml_store_unique_children (node, "style", styles_table);
if (xmlStrcmp (node->name, (const xmlChar *) "layout") == 0)
ttml_store_unique_children (node, "region", regions_table);
}
}
/* Remove nodes that do not belong to @region, or are not an ancestor of a node
* belonging to @region. */
static GNode *
ttml_remove_nodes_by_region (GNode * node, const gchar * region)
{
GNode *child, *next_child;
TtmlElement *element;
element = node->data;
child = node->children;
next_child = child ? child->next : NULL;
while (child) {
ttml_remove_nodes_by_region (child, region);
child = next_child;
next_child = child ? child->next : NULL;
}
if ((element->type == TTML_ELEMENT_TYPE_ANON_SPAN
|| element->type != TTML_ELEMENT_TYPE_BR)
&& element->region && (g_strcmp0 (element->region, region) != 0)) {
ttml_delete_element (element);
g_node_destroy (node);
return NULL;
}
if (element->type != TTML_ELEMENT_TYPE_ANON_SPAN
&& element->type != TTML_ELEMENT_TYPE_BR && !node->children) {
ttml_delete_element (element);
g_node_destroy (node);
return NULL;
}
return node;
}
static TtmlElement *
ttml_copy_element (const TtmlElement * element)
{
TtmlElement *ret = g_slice_new0 (TtmlElement);
ret->type = element->type;
if (element->id)
ret->id = g_strdup (element->id);
ret->whitespace_mode = element->whitespace_mode;
if (element->styles)
ret->styles = g_strdupv (element->styles);
if (element->region)
ret->region = g_strdup (element->region);
ret->begin = element->begin;
ret->end = element->end;
if (element->style_set)
ret->style_set = ttml_style_set_copy (element->style_set);
if (element->text)
ret->text = g_strdup (element->text);
ret->text_index = element->text_index;
return ret;
}
static gpointer
ttml_copy_tree_element (gconstpointer src, gpointer data)
{
return ttml_copy_element ((TtmlElement *) src);
}
/* Split the body tree into a set of trees, each containing only the elements
* belonging to a single region. Returns a list of trees, one per region, each
* with the corresponding region element at its root. */
static GList *
ttml_split_body_by_region (GNode * body, GHashTable * regions)
{
GHashTableIter iter;
gpointer key, value;
GList *ret = NULL;
g_hash_table_iter_init (&iter, regions);
while (g_hash_table_iter_next (&iter, &key, &value)) {
gchar *region_name = (gchar *) key;
TtmlElement *region = (TtmlElement *) value;
GNode *region_node = g_node_new (ttml_copy_element (region));
GNode *body_copy = g_node_copy_deep (body, ttml_copy_tree_element, NULL);
GST_CAT_DEBUG (ttmlparse_debug, "Creating tree for region %s", region_name);
GST_CAT_LOG (ttmlparse_debug, "Copy of body has %u nodes.",
g_node_n_nodes (body_copy, G_TRAVERSE_ALL));
body_copy = ttml_remove_nodes_by_region (body_copy, region_name);
if (body_copy) {
GST_CAT_LOG (ttmlparse_debug, "Copy of body now has %u nodes.",
g_node_n_nodes (body_copy, G_TRAVERSE_ALL));
/* Reparent tree to region node. */
g_node_prepend (region_node, body_copy);
}
GST_CAT_LOG (ttmlparse_debug, "Final tree has %u nodes.",
g_node_n_nodes (region_node, G_TRAVERSE_ALL));
ret = g_list_append (ret, region_node);
}
GST_CAT_DEBUG (ttmlparse_debug, "Returning %u trees.", g_list_length (ret));
return ret;
}
static guint
ttml_add_text_to_buffer (GstBuffer * buf, const gchar * text)
{
GstMemory *mem;
GstMapInfo map;
guint ret;
mem = gst_allocator_alloc (NULL, strlen (text) + 1, NULL);
if (!gst_memory_map (mem, &map, GST_MAP_WRITE))
GST_CAT_ERROR (ttmlparse_debug, "Failed to map memory.");
g_strlcpy ((gchar *) map.data, text, map.size);
GST_CAT_DEBUG (ttmlparse_debug, "Inserted following text into buffer: %s",
(gchar *) map.data);
gst_memory_unmap (mem, &map);
ret = gst_buffer_n_memory (buf);
gst_buffer_insert_memory (buf, -1, mem);
return ret;
}
/* Create a GstSubtitleElement from @element, add it to @block, and insert its
* associated text in @buf. */
static void
ttml_add_element (GstSubtitleBlock * block, TtmlElement * element,
GstBuffer * buf, guint cellres_x, guint cellres_y)
{
GstSubtitleStyleSet *element_style = NULL;
guint buffer_index;
GstSubtitleElement *sub_element = NULL;
element_style = gst_subtitle_style_set_new ();
ttml_update_style_set (element_style, element->style_set,
cellres_x, cellres_y);
GST_CAT_DEBUG (ttmlparse_debug, "Creating element with text index %u",
element->text_index);
if (element->type != TTML_ELEMENT_TYPE_BR)
buffer_index = ttml_add_text_to_buffer (buf, element->text);
else
buffer_index = ttml_add_text_to_buffer (buf, "\n");
GST_CAT_DEBUG (ttmlparse_debug, "Inserted text at index %u in GstBuffer.",
buffer_index);
sub_element = gst_subtitle_element_new (element_style, buffer_index,
(element->whitespace_mode != TTML_WHITESPACE_MODE_PRESERVE));
gst_subtitle_block_add_element (block, sub_element);
GST_CAT_DEBUG (ttmlparse_debug, "Added element to block; there are now %u"
" elements in the block.", gst_subtitle_block_get_element_count (block));
}
/* Return TRUE if @color is totally transparent. */
static gboolean
ttml_color_is_transparent (const GstSubtitleColor * color)
{
if (!color)
return FALSE;
else
return (color->a == 0);
}
/* Blend @color2 over @color1 and return the resulting color. This is currently
* a dummy implementation that simply returns color2 as long as it's
* not fully transparent. */
/* TODO: Implement actual blending of colors. */
static GstSubtitleColor
ttml_blend_colors (GstSubtitleColor color1, GstSubtitleColor color2)
{
if (ttml_color_is_transparent (&color2))
return color1;
else
return color2;
}
/* Create the subtitle region and its child blocks and elements for @tree,
* inserting element text in @buf. Ownership of created region is transferred
* to caller. */
static GstSubtitleRegion *
ttml_create_subtitle_region (GNode * tree, GstBuffer * buf, guint cellres_x,
guint cellres_y)
{
GstSubtitleRegion *region = NULL;
GstSubtitleStyleSet *region_style;
GstSubtitleColor block_color;
TtmlElement *element;
GNode *node;
element = tree->data; /* Region element */
region_style = gst_subtitle_style_set_new ();
ttml_update_style_set (region_style, element->style_set, cellres_x,
cellres_y);
region = gst_subtitle_region_new (region_style);
node = tree->children;
if (!node)
return region;
element = node->data; /* Body element */
block_color =
ttml_parse_colorstring (ttml_style_set_get_attr (element->style_set,
"backgroundColor"));
for (node = node->children; node; node = node->next) {
GNode *p_node;
GstSubtitleColor div_color;
element = node->data;
if (element->type != TTML_ELEMENT_TYPE_DIV) {
GST_CAT_WARNING (ttmlparse_debug,
"Ignoring %s child of body element: only a div is allowed here.",
ttml_get_element_type_string (element));
continue;
}
div_color =
ttml_parse_colorstring (ttml_style_set_get_attr (element->style_set,
"backgroundColor"));
block_color = ttml_blend_colors (block_color, div_color);
for (p_node = node->children; p_node; p_node = p_node->next) {
GstSubtitleBlock *block = NULL;
GstSubtitleStyleSet *block_style;
GNode *content_node;
GstSubtitleColor p_color;
element = p_node->data;
if (element->type != TTML_ELEMENT_TYPE_P) {
GST_CAT_WARNING (ttmlparse_debug,
"Ignoring %s child of div element: only a p is allowed here.",
ttml_get_element_type_string (element));
continue;
}
p_color =
ttml_parse_colorstring (ttml_style_set_get_attr (element->style_set,
"backgroundColor"));
block_color = ttml_blend_colors (block_color, p_color);
block_style = gst_subtitle_style_set_new ();
ttml_update_style_set (block_style, element->style_set, cellres_x,
cellres_y);
block_style->background_color = block_color;
block = gst_subtitle_block_new (block_style);
for (content_node = p_node->children; content_node;
content_node = content_node->next) {
GNode *anon_node;
element = content_node->data;
if (element->type == TTML_ELEMENT_TYPE_BR
|| element->type == TTML_ELEMENT_TYPE_ANON_SPAN) {
ttml_add_element (block, element, buf, cellres_x, cellres_y);
} else if (element->type == TTML_ELEMENT_TYPE_SPAN) {
/* Loop through anon-span children of this span. */
for (anon_node = content_node->children; anon_node;
anon_node = anon_node->next) {
element = anon_node->data;
if (element->type == TTML_ELEMENT_TYPE_BR
|| element->type == TTML_ELEMENT_TYPE_ANON_SPAN) {
ttml_add_element (block, element, buf, cellres_x, cellres_y);
} else {
GST_CAT_WARNING (ttmlparse_debug,
"Ignoring illegally positioned %s element.",
ttml_get_element_type_string (element));
}
}
} else {
GST_CAT_WARNING (ttmlparse_debug,
"Ignoring illegally positioned %s element.",
ttml_get_element_type_string (element));
}
}
gst_subtitle_region_add_block (region, block);
GST_CAT_DEBUG (ttmlparse_debug,
"Added block to region; there are now %u blocks in the region.",
gst_subtitle_region_get_block_count (region));
}
}
return region;
}
/* For each scene, create data objects to describe the layout and styling of
* that scene and attach it as metadata to the GstBuffer that will be used to
* carry that scene's text. */
static void
ttml_attach_scene_metadata (GList * scenes, guint cellres_x, guint cellres_y)
{
GList *scene_entry;
for (scene_entry = g_list_first (scenes); scene_entry;
scene_entry = scene_entry->next) {
TtmlScene *scene = scene_entry->data;
GList *region_tree;
GPtrArray *regions = g_ptr_array_new_with_free_func (
(GDestroyNotify) gst_subtitle_region_unref);
scene->buf = gst_buffer_new ();
GST_BUFFER_PTS (scene->buf) = scene->begin;
GST_BUFFER_DURATION (scene->buf) = (scene->end - scene->begin);
for (region_tree = g_list_first (scene->trees); region_tree;
region_tree = region_tree->next) {
GNode *tree = (GNode *) region_tree->data;
GstSubtitleRegion *region;
region = ttml_create_subtitle_region (tree, scene->buf, cellres_x,
cellres_y);
if (region)
g_ptr_array_add (regions, region);
}
gst_buffer_add_subtitle_meta (scene->buf, regions);
}
}
static GList *
create_buffer_list (GList * scenes)
{
GList *ret = NULL;
while (scenes) {
TtmlScene *scene = scenes->data;
ret = g_list_prepend (ret, gst_buffer_ref (scene->buf));
scenes = scenes->next;
}
return g_list_reverse (ret);
}
static void
ttml_delete_scene (TtmlScene * scene)
{
if (scene->trees)
g_list_free_full (scene->trees, (GDestroyNotify) g_node_destroy);
if (scene->buf)
gst_buffer_unref (scene->buf);
g_slice_free (TtmlScene, scene);
}
static void
ttml_assign_region_times (GList * region_trees, GstClockTime doc_begin,
GstClockTime doc_duration)
{
GList *tree;
for (tree = g_list_first (region_trees); tree; tree = tree->next) {
GNode *region_node = (GNode *) tree->data;
TtmlElement *region = (TtmlElement *) region_node->data;
const gchar *show_background_value =
ttml_style_set_get_attr (region->style_set, "showBackground");
gboolean always_visible =
(g_strcmp0 (show_background_value, "always") == 0);
GstSubtitleColor region_color = { 0, 0, 0, 0 };
if (ttml_style_set_contains_attr (region->style_set, "backgroundColor"))
region_color =
ttml_parse_colorstring (ttml_style_set_get_attr (region->style_set,
"backgroundColor"));
if (always_visible && !ttml_color_is_transparent (&region_color)) {
GST_CAT_DEBUG (ttmlparse_debug, "Assigning times to region.");
/* If the input XML document was not encapsulated in a container that
* provides timing information for the document as a whole (i.e., its
* PTS and duration) and the region background should be always visible,
* set region start time to 0 and end time to 24 hours. This ensures that
* regions with showBackground="always" are visible for the entirety of
* any real-world stream. */
region->begin = (doc_begin != GST_CLOCK_TIME_NONE) ? doc_begin : 0;
region->end = (doc_duration != GST_CLOCK_TIME_NONE) ?
region->begin + doc_duration : NSECONDS_IN_DAY;
}
}
}
static xmlNodePtr
ttml_find_child (xmlNodePtr parent, const gchar * name)
{
xmlNodePtr child = parent->children;
while (child && xmlStrcmp (child->name, (const xmlChar *) name) != 0)
child = child->next;
return child;
}
GList *
ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
{
xmlDocPtr doc;
xmlNodePtr root_node, head_node, body_node;
GHashTable *styles_table, *regions_table;
GList *output_buffers = NULL;
gchar *value;
guint cellres_x, cellres_y;
TtmlWhitespaceMode doc_whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT;
if (!g_utf8_validate (input, -1, NULL)) {
GST_CAT_ERROR (ttmlparse_debug, "Input isn't valid UTF-8.");
return NULL;
}
GST_CAT_LOG (ttmlparse_debug, "Input:\n%s", input);
styles_table = g_hash_table_new_full (g_str_hash, g_str_equal, NULL,
(GDestroyNotify) ttml_delete_element);
regions_table = g_hash_table_new_full (g_str_hash, g_str_equal, NULL,
(GDestroyNotify) ttml_delete_element);
/* Parse input. */
doc = xmlReadMemory (input, strlen (input), "any_doc_name", NULL, 0);
if (!doc) {
GST_CAT_ERROR (ttmlparse_debug, "Failed to parse document.");
return NULL;
}
root_node = xmlDocGetRootElement (doc);
if (xmlStrcmp (root_node->name, (const xmlChar *) "tt") != 0) {
GST_CAT_ERROR (ttmlparse_debug, "Root element of document is not tt:tt.");
xmlFreeDoc (doc);
return NULL;
}
if ((value = ttml_get_xml_property (root_node, "cellResolution"))) {
gchar *ptr = value;
cellres_x = (guint) g_ascii_strtoull (ptr, &ptr, 10U);
cellres_y = (guint) g_ascii_strtoull (ptr, NULL, 10U);
g_free (value);
} else {
cellres_x = DEFAULT_CELLRES_X;
cellres_y = DEFAULT_CELLRES_Y;
}
GST_CAT_DEBUG (ttmlparse_debug, "cellres_x: %u cellres_y: %u", cellres_x,
cellres_y);
if ((value = ttml_get_xml_property (root_node, "space"))) {
if (g_strcmp0 (value, "preserve") == 0) {
GST_CAT_DEBUG (ttmlparse_debug, "Preserving whitespace...");
doc_whitespace_mode = TTML_WHITESPACE_MODE_PRESERVE;
}
g_free (value);
}
if (!(head_node = ttml_find_child (root_node, "head"))) {
GST_CAT_ERROR (ttmlparse_debug, "No <head> element found.");
xmlFreeDoc (doc);
return NULL;
}
ttml_parse_head (head_node, styles_table, regions_table);
if ((body_node = ttml_find_child (root_node, "body"))) {
GNode *body_tree;
GList *region_trees = NULL;
GList *scenes = NULL;
body_tree = ttml_parse_body (body_node);
GST_CAT_LOG (ttmlparse_debug, "body_tree tree contains %u nodes.",
g_node_n_nodes (body_tree, G_TRAVERSE_ALL));
GST_CAT_LOG (ttmlparse_debug, "body_tree tree height is %u",
g_node_max_height (body_tree));
ttml_inherit_whitespace_mode (body_tree, doc_whitespace_mode);
ttml_handle_whitespace (body_tree);
ttml_filter_content_nodes (body_tree);
if (GST_CLOCK_TIME_IS_VALID (begin) && GST_CLOCK_TIME_IS_VALID (duration))
ttml_apply_time_window (body_tree, begin, begin + duration);
ttml_resolve_timings (body_tree);
ttml_resolve_regions (body_tree);
region_trees = ttml_split_body_by_region (body_tree, regions_table);
ttml_resolve_referenced_styles (region_trees, styles_table);
ttml_inherit_element_styles (region_trees);
ttml_assign_region_times (region_trees, begin, duration);
scenes = ttml_create_scenes (region_trees);
GST_CAT_LOG (ttmlparse_debug, "There are %u scenes in all.",
g_list_length (scenes));
ttml_attach_scene_metadata (scenes, cellres_x, cellres_y);
output_buffers = create_buffer_list (scenes);
g_list_free_full (scenes, (GDestroyNotify) ttml_delete_scene);
g_list_free_full (region_trees, (GDestroyNotify) ttml_delete_tree);
ttml_delete_tree (body_tree);
}
xmlFreeDoc (doc);
g_hash_table_destroy (styles_table);
g_hash_table_destroy (regions_table);
return output_buffers;
}