ttmlparse: Collect buffers until detecting complete xml document

Given buffer could be fragmented and we might need to
collect buffers until end tag is detected. And/or, a buffer
can consist of multiple ttml documents.

Fixes: https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/issues/494
This commit is contained in:
Seungha Yang 2018-07-20 21:33:24 +09:00
parent 348494fd34
commit ae76f9ab23
3 changed files with 59 additions and 25 deletions

View file

@ -427,11 +427,11 @@ feed_textbuf (GstTtmlParse * self, GstBuffer * buf)
input = convert_encoding (self, (const gchar *) data, avail, &consumed); input = convert_encoding (self, (const gchar *) data, avail, &consumed);
if (input && consumed > 0) { if (input && consumed > 0) {
if (self->textbuf) { if (!self->textbuf)
g_string_free (self->textbuf, TRUE);
self->textbuf = NULL;
}
self->textbuf = g_string_new (input); self->textbuf = g_string_new (input);
else
self->textbuf = g_string_append (self->textbuf, input);
gst_adapter_unmap (self->adapter); gst_adapter_unmap (self->adapter);
gst_adapter_flush (self->adapter, consumed); gst_adapter_flush (self->adapter, consumed);
} else { } else {
@ -446,9 +446,11 @@ handle_buffer (GstTtmlParse * self, GstBuffer * buf)
{ {
GstFlowReturn ret = GST_FLOW_OK; GstFlowReturn ret = GST_FLOW_OK;
GstCaps *caps = NULL; GstCaps *caps = NULL;
GList *subtitle_list, *subtitle; GList *subtitle_list = NULL;
GList *iter;
GstClockTime begin = GST_BUFFER_PTS (buf); GstClockTime begin = GST_BUFFER_PTS (buf);
GstClockTime duration = GST_BUFFER_DURATION (buf); GstClockTime duration = GST_BUFFER_DURATION (buf);
guint consumed;
if (self->first_buffer) { if (self->first_buffer) {
GstMapInfo map; GstMapInfo map;
@ -474,19 +476,31 @@ handle_buffer (GstTtmlParse * self, GstBuffer * buf)
self->need_segment = FALSE; self->need_segment = FALSE;
} }
subtitle_list = ttml_parse (self->textbuf->str, begin, duration); do {
consumed = ttml_parse (self->textbuf->str, begin, duration, &subtitle_list);
for (subtitle = subtitle_list; subtitle; subtitle = subtitle->next) { if (!consumed) {
GstBuffer *op_buffer = subtitle->data; GST_DEBUG_OBJECT (self, "need more data");
return ret;
}
self->textbuf = g_string_erase (self->textbuf, 0, consumed);
for (iter = subtitle_list; iter; iter = g_list_next (iter)) {
GstBuffer *op_buffer = GST_BUFFER (iter->data);
self->segment.position = GST_BUFFER_PTS (op_buffer); self->segment.position = GST_BUFFER_PTS (op_buffer);
ret = gst_pad_push (self->srcpad, op_buffer); ret = gst_pad_push (self->srcpad, op_buffer);
if (ret != GST_FLOW_OK) if (ret != GST_FLOW_OK) {
GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret)); GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret));
break;
}
} }
g_list_free (subtitle_list); g_list_free (subtitle_list);
} while (TRUE);
return ret; return ret;
} }

View file

@ -1926,9 +1926,11 @@ ttml_find_child (xmlNodePtr parent, const gchar * name)
return child; return child;
} }
#define TTML_END_TAG "</tt>"
GList * guint
ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration) ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration,
GList ** parsed)
{ {
xmlDocPtr doc; xmlDocPtr doc;
xmlNodePtr root_node, head_node, body_node; xmlNodePtr root_node, head_node, body_node;
@ -1938,30 +1940,45 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
gchar *value; gchar *value;
guint cellres_x, cellres_y; guint cellres_x, cellres_y;
TtmlWhitespaceMode doc_whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT; TtmlWhitespaceMode doc_whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT;
guint consumed = 0;
gchar *end_tt;
g_return_val_if_fail (parsed != NULL, 0);
*parsed = NULL;
if (!g_utf8_validate (input, -1, NULL)) { if (!g_utf8_validate (input, -1, NULL)) {
GST_CAT_ERROR (ttmlparse_debug, "Input isn't valid UTF-8."); GST_CAT_ERROR (ttmlparse_debug, "Input isn't valid UTF-8.");
return NULL; return 0;
} }
GST_CAT_LOG (ttmlparse_debug, "Input:\n%s", input); GST_CAT_LOG (ttmlparse_debug, "Input:\n%s", input);
end_tt = g_strrstr (input, TTML_END_TAG);
if (!end_tt) {
GST_CAT_DEBUG (ttmlparse_debug, "Need more data");
return 0;
}
consumed = end_tt - input + strlen (TTML_END_TAG);
styles_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, styles_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
(GDestroyNotify) ttml_delete_element); (GDestroyNotify) ttml_delete_element);
regions_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, regions_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
(GDestroyNotify) ttml_delete_element); (GDestroyNotify) ttml_delete_element);
/* Parse input. */ /* Parse input. */
doc = xmlReadMemory (input, strlen (input), "any_doc_name", NULL, 0); doc = xmlReadMemory (input, consumed, "any_doc_name", NULL, 0);
if (!doc) { if (!doc) {
GST_CAT_ERROR (ttmlparse_debug, "Failed to parse document."); GST_CAT_ERROR (ttmlparse_debug, "Failed to parse document.");
return NULL; return 0;
} }
root_node = xmlDocGetRootElement (doc); root_node = xmlDocGetRootElement (doc);
if (xmlStrcmp (root_node->name, (const xmlChar *) "tt") != 0) { if (xmlStrcmp (root_node->name, (const xmlChar *) "tt") != 0) {
GST_CAT_ERROR (ttmlparse_debug, "Root element of document is not tt:tt."); GST_CAT_ERROR (ttmlparse_debug, "Root element of document is not tt:tt.");
xmlFreeDoc (doc); xmlFreeDoc (doc);
return NULL; return 0;
} }
if ((value = ttml_get_xml_property (root_node, "cellResolution"))) { if ((value = ttml_get_xml_property (root_node, "cellResolution"))) {
@ -1988,8 +2005,9 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
if (!(head_node = ttml_find_child (root_node, "head"))) { if (!(head_node = ttml_find_child (root_node, "head"))) {
GST_CAT_ERROR (ttmlparse_debug, "No <head> element found."); GST_CAT_ERROR (ttmlparse_debug, "No <head> element found.");
xmlFreeDoc (doc); xmlFreeDoc (doc);
return NULL; return 0;
} }
ttml_parse_head (head_node, styles_table, regions_table); ttml_parse_head (head_node, styles_table, regions_table);
if ((body_node = ttml_find_child (root_node, "body"))) { if ((body_node = ttml_find_child (root_node, "body"))) {
@ -2030,5 +2048,7 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
g_hash_table_destroy (styles_table); g_hash_table_destroy (styles_table);
g_hash_table_destroy (regions_table); g_hash_table_destroy (regions_table);
return output_buffers; *parsed = output_buffers;
return consumed;
} }

View file

@ -27,8 +27,8 @@
G_BEGIN_DECLS G_BEGIN_DECLS
GList *ttml_parse (const gchar * file, GstClockTime begin, guint ttml_parse (const gchar * file, GstClockTime begin,
GstClockTime duration); GstClockTime duration, GList **parsed);
G_END_DECLS G_END_DECLS
#endif /* _TTML_PARSE_H_ */ #endif /* _TTML_PARSE_H_ */