ttmlparse: Collect buffers until detecting complete xml document

Given buffer could be fragmented and we might need to
collect buffers until end tag is detected. And/or, a buffer
can consist of multiple ttml documents.

Fixes: https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/issues/494
This commit is contained in:
Seungha Yang 2018-07-20 21:33:24 +09:00
parent 348494fd34
commit ae76f9ab23
3 changed files with 59 additions and 25 deletions

View file

@ -427,11 +427,11 @@ feed_textbuf (GstTtmlParse * self, GstBuffer * buf)
input = convert_encoding (self, (const gchar *) data, avail, &consumed);
if (input && consumed > 0) {
if (self->textbuf) {
g_string_free (self->textbuf, TRUE);
self->textbuf = NULL;
}
self->textbuf = g_string_new (input);
if (!self->textbuf)
self->textbuf = g_string_new (input);
else
self->textbuf = g_string_append (self->textbuf, input);
gst_adapter_unmap (self->adapter);
gst_adapter_flush (self->adapter, consumed);
} else {
@ -446,9 +446,11 @@ handle_buffer (GstTtmlParse * self, GstBuffer * buf)
{
GstFlowReturn ret = GST_FLOW_OK;
GstCaps *caps = NULL;
GList *subtitle_list, *subtitle;
GList *subtitle_list = NULL;
GList *iter;
GstClockTime begin = GST_BUFFER_PTS (buf);
GstClockTime duration = GST_BUFFER_DURATION (buf);
guint consumed;
if (self->first_buffer) {
GstMapInfo map;
@ -474,19 +476,31 @@ handle_buffer (GstTtmlParse * self, GstBuffer * buf)
self->need_segment = FALSE;
}
subtitle_list = ttml_parse (self->textbuf->str, begin, duration);
do {
consumed = ttml_parse (self->textbuf->str, begin, duration, &subtitle_list);
for (subtitle = subtitle_list; subtitle; subtitle = subtitle->next) {
GstBuffer *op_buffer = subtitle->data;
self->segment.position = GST_BUFFER_PTS (op_buffer);
if (!consumed) {
GST_DEBUG_OBJECT (self, "need more data");
return ret;
}
ret = gst_pad_push (self->srcpad, op_buffer);
self->textbuf = g_string_erase (self->textbuf, 0, consumed);
if (ret != GST_FLOW_OK)
GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret));
}
for (iter = subtitle_list; iter; iter = g_list_next (iter)) {
GstBuffer *op_buffer = GST_BUFFER (iter->data);
self->segment.position = GST_BUFFER_PTS (op_buffer);
ret = gst_pad_push (self->srcpad, op_buffer);
if (ret != GST_FLOW_OK) {
GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret));
break;
}
}
g_list_free (subtitle_list);
} while (TRUE);
g_list_free (subtitle_list);
return ret;
}

View file

@ -1926,9 +1926,11 @@ ttml_find_child (xmlNodePtr parent, const gchar * name)
return child;
}
#define TTML_END_TAG "</tt>"
GList *
ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
guint
ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration,
GList ** parsed)
{
xmlDocPtr doc;
xmlNodePtr root_node, head_node, body_node;
@ -1938,30 +1940,45 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
gchar *value;
guint cellres_x, cellres_y;
TtmlWhitespaceMode doc_whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT;
guint consumed = 0;
gchar *end_tt;
g_return_val_if_fail (parsed != NULL, 0);
*parsed = NULL;
if (!g_utf8_validate (input, -1, NULL)) {
GST_CAT_ERROR (ttmlparse_debug, "Input isn't valid UTF-8.");
return NULL;
return 0;
}
GST_CAT_LOG (ttmlparse_debug, "Input:\n%s", input);
end_tt = g_strrstr (input, TTML_END_TAG);
if (!end_tt) {
GST_CAT_DEBUG (ttmlparse_debug, "Need more data");
return 0;
}
consumed = end_tt - input + strlen (TTML_END_TAG);
styles_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
(GDestroyNotify) ttml_delete_element);
regions_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
(GDestroyNotify) ttml_delete_element);
/* Parse input. */
doc = xmlReadMemory (input, strlen (input), "any_doc_name", NULL, 0);
doc = xmlReadMemory (input, consumed, "any_doc_name", NULL, 0);
if (!doc) {
GST_CAT_ERROR (ttmlparse_debug, "Failed to parse document.");
return NULL;
return 0;
}
root_node = xmlDocGetRootElement (doc);
if (xmlStrcmp (root_node->name, (const xmlChar *) "tt") != 0) {
GST_CAT_ERROR (ttmlparse_debug, "Root element of document is not tt:tt.");
xmlFreeDoc (doc);
return NULL;
return 0;
}
if ((value = ttml_get_xml_property (root_node, "cellResolution"))) {
@ -1988,8 +2005,9 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
if (!(head_node = ttml_find_child (root_node, "head"))) {
GST_CAT_ERROR (ttmlparse_debug, "No <head> element found.");
xmlFreeDoc (doc);
return NULL;
return 0;
}
ttml_parse_head (head_node, styles_table, regions_table);
if ((body_node = ttml_find_child (root_node, "body"))) {
@ -2030,5 +2048,7 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
g_hash_table_destroy (styles_table);
g_hash_table_destroy (regions_table);
return output_buffers;
*parsed = output_buffers;
return consumed;
}

View file

@ -27,8 +27,8 @@
G_BEGIN_DECLS
GList *ttml_parse (const gchar * file, GstClockTime begin,
GstClockTime duration);
guint ttml_parse (const gchar * file, GstClockTime begin,
GstClockTime duration, GList **parsed);
G_END_DECLS
#endif /* _TTML_PARSE_H_ */