ttmlparse: Consolidate adjacent co-styled inline elements

A common subtitling use case is live-generated subtitles, in which each
new word is contained in its own span, and the spans are displayed
sequentially, with the effect that lines of displayed subtitles are
built up word-by-word.

This can, however, cause problems when the number of words in a block is
greater than the number of allowed GstMemorys in a GstBuffer.

Since in this use case each span will have the same styling as adjacent
spans, we can join adjacent spans (and other inline elements, such as
breaks) into a single element containing the concatenated text of each,
thus avoiding the limit of GstMemorys in a GstBuffer and also reducing
the amount of styling/layout metadata that is attached to each buffer.

https://bugzilla.gnome.org/show_bug.cgi?id=781725
This commit is contained in:
Chris Bass 2017-04-25 15:15:13 +01:00 committed by Sebastian Dröge
parent 22c3830858
commit acfaf3a001

View file

@ -54,6 +54,7 @@ GST_DEBUG_CATEGORY_EXTERN (ttmlparse_debug);
#define GST_CAT_DEFAULT ttmlparse_debug
static gchar *ttml_get_xml_property (const xmlNode * node, const char *name);
static gpointer ttml_copy_tree_element (gconstpointer src, gpointer data);
typedef struct _TtmlStyleSet TtmlStyleSet;
typedef struct _TtmlElement TtmlElement;
@ -776,6 +777,38 @@ ttml_style_set_inherit (TtmlStyleSet * parent, TtmlStyleSet * child)
}
/*
* Returns TRUE iff @element1 and @element2 reference the same set of styles.
* If neither @element1 nor @element2 reference any styles, they are considered
* to have matching styling and, hence, TRUE is returned.
*/
static gboolean
ttml_element_styles_match (TtmlElement * element1, TtmlElement * element2)
{
const gchar *const *strv;
gint i;
if (!element1 || !element2 || (!element1->styles && element2->styles) ||
(element1->styles && !element2->styles))
return FALSE;
if (!element1->styles && !element2->styles)
return TRUE;
strv = (const gchar * const *) element2->styles;
if (g_strv_length (element1->styles) != g_strv_length (element2->styles))
return FALSE;
for (i = 0; i < g_strv_length (element1->styles); ++i) {
if (!g_strv_contains (strv, element1->styles[i]))
return FALSE;
}
return TRUE;
}
static gchar *
ttml_get_element_type_string (TtmlElement * element)
{
@ -1172,7 +1205,8 @@ ttml_get_active_trees (GList * element_trees, GstClockTime time)
GList *ret = NULL;
for (tree = g_list_first (element_trees); tree; tree = tree->next) {
GNode *root = g_node_copy ((GNode *) tree->data);
GNode *root = g_node_copy_deep ((GNode *) tree->data,
ttml_copy_tree_element, NULL);
GST_CAT_LOG (ttmlparse_debug, "There are %u nodes in tree.",
g_node_n_nodes (root, G_TRAVERSE_ALL));
root = ttml_remove_nodes_by_time (root, time);
@ -1481,7 +1515,7 @@ ttml_add_text_to_buffer (GstBuffer * buf, const gchar * text)
GST_CAT_ERROR (ttmlparse_debug, "Failed to map memory.");
g_strlcpy ((gchar *) map.data, text, map.size);
GST_CAT_DEBUG (ttmlparse_debug, "Inserted following text into buffer: %s",
GST_CAT_DEBUG (ttmlparse_debug, "Inserted following text into buffer: \"%s\"",
(gchar *) map.data);
gst_memory_unmap (mem, &map);
@ -1765,6 +1799,118 @@ ttml_assign_region_times (GList * region_trees, GstClockTime doc_begin,
}
/*
* Promotes @node to the position of its parent, setting the prev, next and
* parent pointers of @node to that of its original parent. The replaced parent
* is freed. Should be called only on nodes that are the sole child of their
* parent, otherwise sibling nodes may be leaked.
*/
static void
ttml_promote_node (GNode * node)
{
GNode *parent_node = node->parent;
TtmlElement *parent_element;
if (!parent_node)
return;
parent_element = (TtmlElement *) parent_node->data;
node->prev = parent_node->prev;
if (!node->prev)
parent_node->parent->children = node;
else
node->prev->next = node;
node->next = parent_node->next;
if (node->next)
node->next->prev = node;
node->parent = parent_node->parent;
parent_node->prev = parent_node->next = NULL;
parent_node->parent = parent_node->children = NULL;
g_node_destroy (parent_node);
ttml_delete_element (parent_element);
}
/*
* Returns TRUE if @element is of a type that can be joined with another
* joinable element.
*/
static gboolean
ttml_element_is_joinable (TtmlElement * element)
{
return element->type == TTML_ELEMENT_TYPE_ANON_SPAN ||
element->type == TTML_ELEMENT_TYPE_BR;
}
/* Joins adjacent inline element in @tree that have the same styling. */
static void
ttml_join_region_tree_inline_elements (GNode * tree)
{
GNode *n1, *n2;
for (n1 = tree; n1; n1 = n1->next) {
if (n1->children) {
TtmlElement *element = (TtmlElement *) n1->data;
ttml_join_region_tree_inline_elements (n1->children);
if (element->type == TTML_ELEMENT_TYPE_SPAN &&
g_node_n_children (n1) == 1) {
GNode *child = n1->children;
if (n1 == tree)
tree = child;
ttml_promote_node (child);
n1 = child;
}
}
}
n1 = tree;
n2 = tree->next;
while (n1 && n2) {
TtmlElement *e1 = (TtmlElement *) n1->data;
TtmlElement *e2 = (TtmlElement *) n2->data;
if (ttml_element_is_joinable (e1) &&
ttml_element_is_joinable (e2) && ttml_element_styles_match (e1, e2)) {
gchar *tmp = e1->text;
GST_CAT_LOG (ttmlparse_debug,
"Joining adjacent element text \"%s\" & \"%s\"", e1->text, e2->text);
e1->text = g_strconcat (e1->text, e2->text, NULL);
e1->type = TTML_ELEMENT_TYPE_ANON_SPAN;
g_free (tmp);
ttml_delete_element (e2);
g_node_destroy (n2);
n2 = n1->next;
} else {
n1 = n2;
n2 = n2 ? n2->next : NULL;
}
}
}
static void
ttml_join_inline_elements (GList * scenes)
{
GList *scene_entry;
for (scene_entry = g_list_first (scenes); scene_entry;
scene_entry = scene_entry->next) {
TtmlScene *scene = scene_entry->data;
GList *region_tree;
for (region_tree = g_list_first (scene->trees); region_tree;
region_tree = region_tree->next) {
GNode *tree = (GNode *) region_tree->data;
ttml_join_region_tree_inline_elements (tree);
}
}
}
static xmlNodePtr
ttml_find_child (xmlNodePtr parent, const gchar * name)
{
@ -1865,6 +2011,7 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
scenes = ttml_create_scenes (region_trees);
GST_CAT_LOG (ttmlparse_debug, "There are %u scenes in all.",
g_list_length (scenes));
ttml_join_inline_elements (scenes);
ttml_attach_scene_metadata (scenes, cellres_x, cellres_y);
output_buffers = create_buffer_list (scenes);