mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-19 23:06:49 +00:00
ttmlparse: Collect buffers until detecting complete xml document
Given buffer could be fragmented and we might need to collect buffers until end tag is detected. And/or, a buffer can consist of multiple ttml documents. Fixes: https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/issues/494
This commit is contained in:
parent
348494fd34
commit
ae76f9ab23
3 changed files with 59 additions and 25 deletions
|
@ -427,11 +427,11 @@ feed_textbuf (GstTtmlParse * self, GstBuffer * buf)
|
|||
input = convert_encoding (self, (const gchar *) data, avail, &consumed);
|
||||
|
||||
if (input && consumed > 0) {
|
||||
if (self->textbuf) {
|
||||
g_string_free (self->textbuf, TRUE);
|
||||
self->textbuf = NULL;
|
||||
}
|
||||
if (!self->textbuf)
|
||||
self->textbuf = g_string_new (input);
|
||||
else
|
||||
self->textbuf = g_string_append (self->textbuf, input);
|
||||
|
||||
gst_adapter_unmap (self->adapter);
|
||||
gst_adapter_flush (self->adapter, consumed);
|
||||
} else {
|
||||
|
@ -446,9 +446,11 @@ handle_buffer (GstTtmlParse * self, GstBuffer * buf)
|
|||
{
|
||||
GstFlowReturn ret = GST_FLOW_OK;
|
||||
GstCaps *caps = NULL;
|
||||
GList *subtitle_list, *subtitle;
|
||||
GList *subtitle_list = NULL;
|
||||
GList *iter;
|
||||
GstClockTime begin = GST_BUFFER_PTS (buf);
|
||||
GstClockTime duration = GST_BUFFER_DURATION (buf);
|
||||
guint consumed;
|
||||
|
||||
if (self->first_buffer) {
|
||||
GstMapInfo map;
|
||||
|
@ -474,19 +476,31 @@ handle_buffer (GstTtmlParse * self, GstBuffer * buf)
|
|||
self->need_segment = FALSE;
|
||||
}
|
||||
|
||||
subtitle_list = ttml_parse (self->textbuf->str, begin, duration);
|
||||
do {
|
||||
consumed = ttml_parse (self->textbuf->str, begin, duration, &subtitle_list);
|
||||
|
||||
for (subtitle = subtitle_list; subtitle; subtitle = subtitle->next) {
|
||||
GstBuffer *op_buffer = subtitle->data;
|
||||
if (!consumed) {
|
||||
GST_DEBUG_OBJECT (self, "need more data");
|
||||
return ret;
|
||||
}
|
||||
|
||||
self->textbuf = g_string_erase (self->textbuf, 0, consumed);
|
||||
|
||||
for (iter = subtitle_list; iter; iter = g_list_next (iter)) {
|
||||
GstBuffer *op_buffer = GST_BUFFER (iter->data);
|
||||
self->segment.position = GST_BUFFER_PTS (op_buffer);
|
||||
|
||||
ret = gst_pad_push (self->srcpad, op_buffer);
|
||||
|
||||
if (ret != GST_FLOW_OK)
|
||||
if (ret != GST_FLOW_OK) {
|
||||
GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
g_list_free (subtitle_list);
|
||||
} while (TRUE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -1926,9 +1926,11 @@ ttml_find_child (xmlNodePtr parent, const gchar * name)
|
|||
return child;
|
||||
}
|
||||
|
||||
#define TTML_END_TAG "</tt>"
|
||||
|
||||
GList *
|
||||
ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
|
||||
guint
|
||||
ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration,
|
||||
GList ** parsed)
|
||||
{
|
||||
xmlDocPtr doc;
|
||||
xmlNodePtr root_node, head_node, body_node;
|
||||
|
@ -1938,30 +1940,45 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
|
|||
gchar *value;
|
||||
guint cellres_x, cellres_y;
|
||||
TtmlWhitespaceMode doc_whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT;
|
||||
guint consumed = 0;
|
||||
gchar *end_tt;
|
||||
|
||||
g_return_val_if_fail (parsed != NULL, 0);
|
||||
|
||||
*parsed = NULL;
|
||||
if (!g_utf8_validate (input, -1, NULL)) {
|
||||
GST_CAT_ERROR (ttmlparse_debug, "Input isn't valid UTF-8.");
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
GST_CAT_LOG (ttmlparse_debug, "Input:\n%s", input);
|
||||
|
||||
end_tt = g_strrstr (input, TTML_END_TAG);
|
||||
|
||||
if (!end_tt) {
|
||||
GST_CAT_DEBUG (ttmlparse_debug, "Need more data");
|
||||
return 0;
|
||||
}
|
||||
|
||||
consumed = end_tt - input + strlen (TTML_END_TAG);
|
||||
|
||||
styles_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
|
||||
(GDestroyNotify) ttml_delete_element);
|
||||
regions_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
|
||||
(GDestroyNotify) ttml_delete_element);
|
||||
|
||||
/* Parse input. */
|
||||
doc = xmlReadMemory (input, strlen (input), "any_doc_name", NULL, 0);
|
||||
doc = xmlReadMemory (input, consumed, "any_doc_name", NULL, 0);
|
||||
if (!doc) {
|
||||
GST_CAT_ERROR (ttmlparse_debug, "Failed to parse document.");
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
root_node = xmlDocGetRootElement (doc);
|
||||
|
||||
if (xmlStrcmp (root_node->name, (const xmlChar *) "tt") != 0) {
|
||||
GST_CAT_ERROR (ttmlparse_debug, "Root element of document is not tt:tt.");
|
||||
xmlFreeDoc (doc);
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((value = ttml_get_xml_property (root_node, "cellResolution"))) {
|
||||
|
@ -1988,8 +2005,9 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
|
|||
if (!(head_node = ttml_find_child (root_node, "head"))) {
|
||||
GST_CAT_ERROR (ttmlparse_debug, "No <head> element found.");
|
||||
xmlFreeDoc (doc);
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ttml_parse_head (head_node, styles_table, regions_table);
|
||||
|
||||
if ((body_node = ttml_find_child (root_node, "body"))) {
|
||||
|
@ -2030,5 +2048,7 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
|
|||
g_hash_table_destroy (styles_table);
|
||||
g_hash_table_destroy (regions_table);
|
||||
|
||||
return output_buffers;
|
||||
*parsed = output_buffers;
|
||||
|
||||
return consumed;
|
||||
}
|
||||
|
|
|
@ -27,8 +27,8 @@
|
|||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
GList *ttml_parse (const gchar * file, GstClockTime begin,
|
||||
GstClockTime duration);
|
||||
guint ttml_parse (const gchar * file, GstClockTime begin,
|
||||
GstClockTime duration, GList **parsed);
|
||||
|
||||
G_END_DECLS
|
||||
#endif /* _TTML_PARSE_H_ */
|
||||
|
|
Loading…
Reference in a new issue