mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-27 12:11:13 +00:00
subparse: convert from pango-markup to utf8 ..
when downstream requires it
This commit is contained in:
parent
7816cbf9a4
commit
caca46e0e6
3 changed files with 146 additions and 8 deletions
|
@ -179,6 +179,7 @@ gst_sub_parse_init (GstSubParse * subparse)
|
||||||
|
|
||||||
subparse->textbuf = g_string_new (NULL);
|
subparse->textbuf = g_string_new (NULL);
|
||||||
subparse->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
|
subparse->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
|
||||||
|
subparse->strip_pango_markup = FALSE;
|
||||||
subparse->flushing = FALSE;
|
subparse->flushing = FALSE;
|
||||||
gst_segment_init (&subparse->segment, GST_FORMAT_TIME);
|
gst_segment_init (&subparse->segment, GST_FORMAT_TIME);
|
||||||
subparse->need_segment = TRUE;
|
subparse->need_segment = TRUE;
|
||||||
|
@ -1724,11 +1725,97 @@ feed_textbuf (GstSubParse * self, GstBuffer * buf)
|
||||||
g_free (input);
|
g_free (input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
xml_text (GMarkupParseContext * context,
|
||||||
|
const gchar * text, gsize text_len, gpointer user_data, GError ** error)
|
||||||
|
{
|
||||||
|
gchar **accum = (gchar **) user_data;
|
||||||
|
gchar *concat;
|
||||||
|
|
||||||
|
if (*accum) {
|
||||||
|
concat = g_strconcat (*accum, text, NULL);
|
||||||
|
g_free (*accum);
|
||||||
|
*accum = concat;
|
||||||
|
} else {
|
||||||
|
*accum = g_strdup (text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static gchar *
|
||||||
|
strip_pango_markup (gchar * markup, GError ** error)
|
||||||
|
{
|
||||||
|
GMarkupParser parser = { 0, };
|
||||||
|
GMarkupParseContext *context;
|
||||||
|
gchar *accum = NULL;
|
||||||
|
|
||||||
|
parser.text = xml_text;
|
||||||
|
context = g_markup_parse_context_new (&parser, 0, &accum, NULL);
|
||||||
|
|
||||||
|
g_markup_parse_context_parse (context, "<root>", 6, NULL);
|
||||||
|
g_markup_parse_context_parse (context, markup, strlen (markup), error);
|
||||||
|
g_markup_parse_context_parse (context, "</root>", 7, NULL);
|
||||||
|
if (*error)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
g_markup_parse_context_end_parse (context, error);
|
||||||
|
if (*error)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
done:
|
||||||
|
g_markup_parse_context_free (context);
|
||||||
|
return accum;
|
||||||
|
|
||||||
|
error:
|
||||||
|
g_free (accum);
|
||||||
|
accum = NULL;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
gst_sub_parse_negotiate (GstSubParse * self, GstCaps * preferred)
|
||||||
|
{
|
||||||
|
GstCaps *caps;
|
||||||
|
gboolean ret = FALSE;
|
||||||
|
const GstStructure *s1, *s2;
|
||||||
|
|
||||||
|
caps = gst_pad_get_allowed_caps (self->srcpad);
|
||||||
|
|
||||||
|
s1 = gst_caps_get_structure (preferred, 0);
|
||||||
|
|
||||||
|
if (!g_strcmp0 (gst_structure_get_string (s1, "format"), "utf8")) {
|
||||||
|
GstCaps *intersected = gst_caps_intersect (caps, preferred);
|
||||||
|
gst_caps_unref (caps);
|
||||||
|
caps = intersected;
|
||||||
|
}
|
||||||
|
|
||||||
|
caps = gst_caps_fixate (caps);
|
||||||
|
|
||||||
|
if (gst_caps_is_empty (caps)) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
s2 = gst_caps_get_structure (caps, 0);
|
||||||
|
|
||||||
|
self->strip_pango_markup =
|
||||||
|
!g_strcmp0 (gst_structure_get_string (s2, "format"), "utf8")
|
||||||
|
&& !g_strcmp0 (gst_structure_get_string (s1, "format"), "pango-markup");
|
||||||
|
|
||||||
|
if (self->strip_pango_markup) {
|
||||||
|
GST_INFO_OBJECT (self, "We will convert from pango-markup to utf8");
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = gst_pad_set_caps (self->srcpad, caps);
|
||||||
|
|
||||||
|
done:
|
||||||
|
gst_caps_unref (caps);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static GstFlowReturn
|
static GstFlowReturn
|
||||||
handle_buffer (GstSubParse * self, GstBuffer * buf)
|
handle_buffer (GstSubParse * self, GstBuffer * buf)
|
||||||
{
|
{
|
||||||
GstFlowReturn ret = GST_FLOW_OK;
|
GstFlowReturn ret = GST_FLOW_OK;
|
||||||
GstCaps *caps = NULL;
|
|
||||||
gchar *line, *subtitle;
|
gchar *line, *subtitle;
|
||||||
gboolean need_tags = FALSE;
|
gboolean need_tags = FALSE;
|
||||||
|
|
||||||
|
@ -1747,14 +1834,19 @@ handle_buffer (GstSubParse * self, GstBuffer * buf)
|
||||||
|
|
||||||
/* make sure we know the format */
|
/* make sure we know the format */
|
||||||
if (G_UNLIKELY (self->parser_type == GST_SUB_PARSE_FORMAT_UNKNOWN)) {
|
if (G_UNLIKELY (self->parser_type == GST_SUB_PARSE_FORMAT_UNKNOWN)) {
|
||||||
if (!(caps = gst_sub_parse_format_autodetect (self))) {
|
GstCaps *preferred;
|
||||||
return GST_FLOW_EOS;
|
|
||||||
|
if (!(preferred = gst_sub_parse_format_autodetect (self))) {
|
||||||
|
return GST_FLOW_NOT_NEGOTIATED;
|
||||||
}
|
}
|
||||||
if (!gst_pad_set_caps (self->srcpad, caps)) {
|
|
||||||
gst_caps_unref (caps);
|
if (!gst_sub_parse_negotiate (self, preferred)) {
|
||||||
return GST_FLOW_EOS;
|
gst_caps_unref (preferred);
|
||||||
|
return GST_FLOW_NOT_NEGOTIATED;
|
||||||
}
|
}
|
||||||
gst_caps_unref (caps);
|
|
||||||
|
gst_caps_unref (preferred);
|
||||||
|
|
||||||
need_tags = TRUE;
|
need_tags = TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1790,7 +1882,22 @@ handle_buffer (GstSubParse * self, GstBuffer * buf)
|
||||||
g_free (line);
|
g_free (line);
|
||||||
|
|
||||||
if (subtitle) {
|
if (subtitle) {
|
||||||
guint subtitle_len = strlen (subtitle);
|
guint subtitle_len;
|
||||||
|
|
||||||
|
if (self->strip_pango_markup) {
|
||||||
|
GError *error = NULL;
|
||||||
|
gchar *stripped;
|
||||||
|
|
||||||
|
if ((stripped = strip_pango_markup (subtitle, &error))) {
|
||||||
|
g_free (subtitle);
|
||||||
|
subtitle = stripped;
|
||||||
|
} else {
|
||||||
|
GST_WARNING_OBJECT (self, "Failed to strip pango markup: %s",
|
||||||
|
error->message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
subtitle_len = strlen (subtitle);
|
||||||
|
|
||||||
/* +1 for terminating NUL character */
|
/* +1 for terminating NUL character */
|
||||||
buf = gst_buffer_new_and_alloc (subtitle_len + 1);
|
buf = gst_buffer_new_and_alloc (subtitle_len + 1);
|
||||||
|
@ -1947,6 +2054,7 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition)
|
||||||
/* format detection will init the parser state */
|
/* format detection will init the parser state */
|
||||||
self->offset = 0;
|
self->offset = 0;
|
||||||
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
|
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
|
||||||
|
self->strip_pango_markup = FALSE;
|
||||||
self->valid_utf8 = TRUE;
|
self->valid_utf8 = TRUE;
|
||||||
self->first_buffer = TRUE;
|
self->first_buffer = TRUE;
|
||||||
g_free (self->detected_encoding);
|
g_free (self->detected_encoding);
|
||||||
|
|
|
@ -99,6 +99,7 @@ struct _GstSubParse {
|
||||||
gboolean valid_utf8;
|
gboolean valid_utf8;
|
||||||
gchar *detected_encoding;
|
gchar *detected_encoding;
|
||||||
gchar *encoding;
|
gchar *encoding;
|
||||||
|
gboolean strip_pango_markup;
|
||||||
|
|
||||||
gboolean first_buffer;
|
gboolean first_buffer;
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <gst/check/gstcheck.h>
|
#include <gst/check/gstcheck.h>
|
||||||
|
#include <gst/check/gstharness.h>
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
@ -1036,6 +1037,33 @@ GST_START_TEST (test_lrc)
|
||||||
|
|
||||||
GST_END_TEST;
|
GST_END_TEST;
|
||||||
|
|
||||||
|
GST_START_TEST (test_raw_conversion)
|
||||||
|
{
|
||||||
|
GstHarness *h;
|
||||||
|
GstBuffer *buffer;
|
||||||
|
GstMapInfo map;
|
||||||
|
|
||||||
|
h = gst_harness_new ("subparse");
|
||||||
|
|
||||||
|
gst_harness_set_src_caps_str (h, "application/x-subtitle");
|
||||||
|
gst_harness_set_sink_caps_str (h, "text/x-raw, format=utf8");
|
||||||
|
|
||||||
|
buffer = buffer_from_static_string (srt_input[5].in);
|
||||||
|
|
||||||
|
buffer = gst_harness_push_and_pull (h, buffer);
|
||||||
|
|
||||||
|
gst_buffer_map (buffer, &map, GST_MAP_READ);
|
||||||
|
fail_unless_equals_int (map.size, 3);
|
||||||
|
fail_unless_equals_string ((gchar *) map.data, "Six");
|
||||||
|
gst_buffer_unmap (buffer, &map);
|
||||||
|
|
||||||
|
gst_clear_buffer (&buffer);
|
||||||
|
|
||||||
|
gst_harness_teardown (h);
|
||||||
|
}
|
||||||
|
|
||||||
|
GST_END_TEST;
|
||||||
|
|
||||||
/* TODO:
|
/* TODO:
|
||||||
* - add/modify tests so that lines aren't dogfed to the parsers in complete
|
* - add/modify tests so that lines aren't dogfed to the parsers in complete
|
||||||
* lines or sets of complete lines, but rather in random chunks
|
* lines or sets of complete lines, but rather in random chunks
|
||||||
|
@ -1071,6 +1099,7 @@ subparse_suite (void)
|
||||||
tcase_add_test (tc_chain, test_sami_bad_entities);
|
tcase_add_test (tc_chain, test_sami_bad_entities);
|
||||||
tcase_add_test (tc_chain, test_sami_comment);
|
tcase_add_test (tc_chain, test_sami_comment);
|
||||||
tcase_add_test (tc_chain, test_lrc);
|
tcase_add_test (tc_chain, test_lrc);
|
||||||
|
tcase_add_test (tc_chain, test_raw_conversion);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue