From 164a3128d5dd7212a1dd6ad84b5ef0e6bc7e3352 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim-Philipp=20M=C3=BCller?= <tim@centricular.net>
Date: Mon, 20 Feb 2006 18:27:06 +0000
Subject: [PATCH] gst/subparse/gstsubparse.c: Set right caps given that we send
 escaped text. Also, honour <i></i>, <b></b> and <u></u>...

Original commit message from CVS:
* gst/subparse/gstsubparse.c: (subrip_unescape_formatting),
(parse_subrip), (gst_sub_parse_format_autodetect):
Set right caps given that we send escaped text. Also,
honour <i></i>, <b></b> and <u></u> markers that can be found
in .srt files (fixes #310202).
---
 ChangeLog                  |  8 ++++++++
 gst/subparse/gstsubparse.c | 41 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/ChangeLog b/ChangeLog
index be366e1982..e31a894e54 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2006-02-20  Tim-Philipp Müller  <tim at centricular dot net>
+
+	* gst/subparse/gstsubparse.c: (subrip_unescape_formatting),
+	(parse_subrip), (gst_sub_parse_format_autodetect):
+	  Set right caps given that we send escaped text. Also,
+	  honour <i></i>, <b></b> and <u></u> markers that can be found
+	  in .srt files (fixes #310202).
+
 2006-02-20  Tim-Philipp Müller  <tim at centricular dot net>
 
 	* gst-libs/gst/audio/mixerutils.c:
diff --git a/gst/subparse/gstsubparse.c b/gst/subparse/gstsubparse.c
index b62b802c66..67c191bde0 100644
--- a/gst/subparse/gstsubparse.c
+++ b/gst/subparse/gstsubparse.c
@@ -383,6 +383,44 @@ parse_mdvdsub (ParserState * state, const gchar * line)
   return ret;
 }
 
+/* we want to escape text in general, but retain basic markup like
+ * <i></i>, <u></u>, and <b></b>. The easiest and safest way is to
+ * just unescape a white list of allowed markups again after
+ * escaping everything (the text between these simple markers isn't
+ * necessarily escaped, so it seems best to do it like this) */
+static void
+subrip_unescape_formatting (gchar * txt)
+{
+  gchar *pos;
+
+  for (pos = txt; pos != NULL && *pos != '\0'; ++pos) {
+    if (g_ascii_strncasecmp (pos, "&lt;u&gt;", 9) == 0 ||
+        g_ascii_strncasecmp (pos, "&lt;i&gt;", 9) == 0 ||
+        g_ascii_strncasecmp (pos, "&lt;b&gt;", 9) == 0) {
+      pos[0] = '<';
+      pos[1] = g_ascii_tolower (pos[4]);
+      pos[2] = '>';
+      /* move NUL terminator as well */
+      g_memmove (pos + 3, pos + 9, strlen (pos + 9) + 1);
+      pos += 2;
+    }
+  }
+
+  for (pos = txt; pos != NULL && *pos != '\0'; ++pos) {
+    if (g_ascii_strncasecmp (pos, "&lt;/u&gt;", 10) == 0 ||
+        g_ascii_strncasecmp (pos, "&lt;/i&gt;", 10) == 0 ||
+        g_ascii_strncasecmp (pos, "&lt;/b&gt;", 10) == 0) {
+      pos[0] = '<';
+      pos[1] = '/';
+      pos[2] = g_ascii_tolower (pos[5]);
+      pos[3] = '>';
+      /* move NUL terminator as well */
+      g_memmove (pos + 4, pos + 10, strlen (pos + 10) + 1);
+      pos += 3;
+    }
+  }
+}
+
 static gchar *
 parse_subrip (ParserState * state, const gchar * line)
 {
@@ -440,6 +478,7 @@ parse_subrip (ParserState * state, const gchar * line)
         ret = g_markup_escape_text (state->buf->str, state->buf->len);
         g_string_truncate (state->buf, 0);
         state->state = 0;
+        subrip_unescape_formatting (ret);
         return ret;
       }
       return NULL;
@@ -596,7 +635,7 @@ gst_sub_parse_format_autodetect (GstSubParse * self)
       return gst_caps_new_simple ("text/x-pango-markup", NULL);
     case GST_SUB_PARSE_FORMAT_SUBRIP:
       self->parse_line = parse_subrip;
-      return gst_caps_new_simple ("text/plain", NULL);
+      return gst_caps_new_simple ("text/x-pango-markup", NULL);
     case GST_SUB_PARSE_FORMAT_MPSUB:
       self->parse_line = parse_mpsub;
       return gst_caps_new_simple ("text/plain", NULL);