mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-11 01:45:33 +00:00
subparse: convert from pango-markup to utf8 ..
when downstream requires it
This commit is contained in:
parent
7816cbf9a4
commit
caca46e0e6
3 changed files with 146 additions and 8 deletions
|
@ -179,6 +179,7 @@ gst_sub_parse_init (GstSubParse * subparse)
|
|||
|
||||
subparse->textbuf = g_string_new (NULL);
|
||||
subparse->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
|
||||
subparse->strip_pango_markup = FALSE;
|
||||
subparse->flushing = FALSE;
|
||||
gst_segment_init (&subparse->segment, GST_FORMAT_TIME);
|
||||
subparse->need_segment = TRUE;
|
||||
|
@ -1724,11 +1725,97 @@ feed_textbuf (GstSubParse * self, GstBuffer * buf)
|
|||
g_free (input);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
xml_text (GMarkupParseContext * context,
|
||||
const gchar * text, gsize text_len, gpointer user_data, GError ** error)
|
||||
{
|
||||
gchar **accum = (gchar **) user_data;
|
||||
gchar *concat;
|
||||
|
||||
if (*accum) {
|
||||
concat = g_strconcat (*accum, text, NULL);
|
||||
g_free (*accum);
|
||||
*accum = concat;
|
||||
} else {
|
||||
*accum = g_strdup (text);
|
||||
}
|
||||
}
|
||||
|
||||
static gchar *
|
||||
strip_pango_markup (gchar * markup, GError ** error)
|
||||
{
|
||||
GMarkupParser parser = { 0, };
|
||||
GMarkupParseContext *context;
|
||||
gchar *accum = NULL;
|
||||
|
||||
parser.text = xml_text;
|
||||
context = g_markup_parse_context_new (&parser, 0, &accum, NULL);
|
||||
|
||||
g_markup_parse_context_parse (context, "<root>", 6, NULL);
|
||||
g_markup_parse_context_parse (context, markup, strlen (markup), error);
|
||||
g_markup_parse_context_parse (context, "</root>", 7, NULL);
|
||||
if (*error)
|
||||
goto error;
|
||||
|
||||
g_markup_parse_context_end_parse (context, error);
|
||||
if (*error)
|
||||
goto error;
|
||||
|
||||
done:
|
||||
g_markup_parse_context_free (context);
|
||||
return accum;
|
||||
|
||||
error:
|
||||
g_free (accum);
|
||||
accum = NULL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_sub_parse_negotiate (GstSubParse * self, GstCaps * preferred)
|
||||
{
|
||||
GstCaps *caps;
|
||||
gboolean ret = FALSE;
|
||||
const GstStructure *s1, *s2;
|
||||
|
||||
caps = gst_pad_get_allowed_caps (self->srcpad);
|
||||
|
||||
s1 = gst_caps_get_structure (preferred, 0);
|
||||
|
||||
if (!g_strcmp0 (gst_structure_get_string (s1, "format"), "utf8")) {
|
||||
GstCaps *intersected = gst_caps_intersect (caps, preferred);
|
||||
gst_caps_unref (caps);
|
||||
caps = intersected;
|
||||
}
|
||||
|
||||
caps = gst_caps_fixate (caps);
|
||||
|
||||
if (gst_caps_is_empty (caps)) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
s2 = gst_caps_get_structure (caps, 0);
|
||||
|
||||
self->strip_pango_markup =
|
||||
!g_strcmp0 (gst_structure_get_string (s2, "format"), "utf8")
|
||||
&& !g_strcmp0 (gst_structure_get_string (s1, "format"), "pango-markup");
|
||||
|
||||
if (self->strip_pango_markup) {
|
||||
GST_INFO_OBJECT (self, "We will convert from pango-markup to utf8");
|
||||
}
|
||||
|
||||
ret = gst_pad_set_caps (self->srcpad, caps);
|
||||
|
||||
done:
|
||||
gst_caps_unref (caps);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static GstFlowReturn
|
||||
handle_buffer (GstSubParse * self, GstBuffer * buf)
|
||||
{
|
||||
GstFlowReturn ret = GST_FLOW_OK;
|
||||
GstCaps *caps = NULL;
|
||||
gchar *line, *subtitle;
|
||||
gboolean need_tags = FALSE;
|
||||
|
||||
|
@ -1747,14 +1834,19 @@ handle_buffer (GstSubParse * self, GstBuffer * buf)
|
|||
|
||||
/* make sure we know the format */
|
||||
if (G_UNLIKELY (self->parser_type == GST_SUB_PARSE_FORMAT_UNKNOWN)) {
|
||||
if (!(caps = gst_sub_parse_format_autodetect (self))) {
|
||||
return GST_FLOW_EOS;
|
||||
GstCaps *preferred;
|
||||
|
||||
if (!(preferred = gst_sub_parse_format_autodetect (self))) {
|
||||
return GST_FLOW_NOT_NEGOTIATED;
|
||||
}
|
||||
if (!gst_pad_set_caps (self->srcpad, caps)) {
|
||||
gst_caps_unref (caps);
|
||||
return GST_FLOW_EOS;
|
||||
|
||||
if (!gst_sub_parse_negotiate (self, preferred)) {
|
||||
gst_caps_unref (preferred);
|
||||
return GST_FLOW_NOT_NEGOTIATED;
|
||||
}
|
||||
gst_caps_unref (caps);
|
||||
|
||||
gst_caps_unref (preferred);
|
||||
|
||||
need_tags = TRUE;
|
||||
}
|
||||
|
||||
|
@ -1790,7 +1882,22 @@ handle_buffer (GstSubParse * self, GstBuffer * buf)
|
|||
g_free (line);
|
||||
|
||||
if (subtitle) {
|
||||
guint subtitle_len = strlen (subtitle);
|
||||
guint subtitle_len;
|
||||
|
||||
if (self->strip_pango_markup) {
|
||||
GError *error = NULL;
|
||||
gchar *stripped;
|
||||
|
||||
if ((stripped = strip_pango_markup (subtitle, &error))) {
|
||||
g_free (subtitle);
|
||||
subtitle = stripped;
|
||||
} else {
|
||||
GST_WARNING_OBJECT (self, "Failed to strip pango markup: %s",
|
||||
error->message);
|
||||
}
|
||||
}
|
||||
|
||||
subtitle_len = strlen (subtitle);
|
||||
|
||||
/* +1 for terminating NUL character */
|
||||
buf = gst_buffer_new_and_alloc (subtitle_len + 1);
|
||||
|
@ -1947,6 +2054,7 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition)
|
|||
/* format detection will init the parser state */
|
||||
self->offset = 0;
|
||||
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
|
||||
self->strip_pango_markup = FALSE;
|
||||
self->valid_utf8 = TRUE;
|
||||
self->first_buffer = TRUE;
|
||||
g_free (self->detected_encoding);
|
||||
|
|
|
@ -99,6 +99,7 @@ struct _GstSubParse {
|
|||
gboolean valid_utf8;
|
||||
gchar *detected_encoding;
|
||||
gchar *encoding;
|
||||
gboolean strip_pango_markup;
|
||||
|
||||
gboolean first_buffer;
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#endif
|
||||
|
||||
#include <gst/check/gstcheck.h>
|
||||
#include <gst/check/gstharness.h>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
|
@ -1036,6 +1037,33 @@ GST_START_TEST (test_lrc)
|
|||
|
||||
GST_END_TEST;
|
||||
|
||||
GST_START_TEST (test_raw_conversion)
|
||||
{
|
||||
GstHarness *h;
|
||||
GstBuffer *buffer;
|
||||
GstMapInfo map;
|
||||
|
||||
h = gst_harness_new ("subparse");
|
||||
|
||||
gst_harness_set_src_caps_str (h, "application/x-subtitle");
|
||||
gst_harness_set_sink_caps_str (h, "text/x-raw, format=utf8");
|
||||
|
||||
buffer = buffer_from_static_string (srt_input[5].in);
|
||||
|
||||
buffer = gst_harness_push_and_pull (h, buffer);
|
||||
|
||||
gst_buffer_map (buffer, &map, GST_MAP_READ);
|
||||
fail_unless_equals_int (map.size, 3);
|
||||
fail_unless_equals_string ((gchar *) map.data, "Six");
|
||||
gst_buffer_unmap (buffer, &map);
|
||||
|
||||
gst_clear_buffer (&buffer);
|
||||
|
||||
gst_harness_teardown (h);
|
||||
}
|
||||
|
||||
GST_END_TEST;
|
||||
|
||||
/* TODO:
|
||||
* - add/modify tests so that lines aren't dogfed to the parsers in complete
|
||||
* lines or sets of complete lines, but rather in random chunks
|
||||
|
@ -1071,6 +1099,7 @@ subparse_suite (void)
|
|||
tcase_add_test (tc_chain, test_sami_bad_entities);
|
||||
tcase_add_test (tc_chain, test_sami_comment);
|
||||
tcase_add_test (tc_chain, test_lrc);
|
||||
tcase_add_test (tc_chain, test_raw_conversion);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue