mpegts: Add function to encode UTF8 strings

This can be used to create descriptors with appropriate character maps
2024-11-23 18:21:04 +00:00 · 2013-10-30 10:43:21 +01:00 · 2013-10-30 10:43:21 +01:00 · 4630dfda05
commit 4630dfda05
parent 465dea6f32
2 changed files with 155 additions and 0 deletions
--- a/gst-libs/gst/mpegts/gstmpegts-private.h
+++ b/gst-libs/gst/mpegts/gstmpegts-private.h
@ -32,6 +32,7 @@ GST_DEBUG_CATEGORY_EXTERN (gst_mpegts_debug);
 G_GNUC_INTERNAL void __initialize_descriptors (void);
 G_GNUC_INTERNAL guint32 _calc_crc32 (const guint8 *data, guint datalen);
 G_GNUC_INTERNAL gchar *get_encoding_and_convert (const gchar *text, guint length);
 G_GNUC_INTERNAL guint8 *dvb_text_from_utf8 (const gchar * text, gsize *out_size);
 typedef gpointer (*GstMpegTsParseFunc) (GstMpegTsSection *section);
 G_GNUC_INTERNAL gpointer __common_desc_checks (GstMpegTsSection *section,
--- a/gst-libs/gst/mpegts/gstmpegtsdescriptor.c
+++ b/gst-libs/gst/mpegts/gstmpegtsdescriptor.c
@ -259,6 +259,160 @@ _get_iconv (LocalIconvCode from, LocalIconvCode to)
  return __iconvs[from][to];
 }
 static void
 _encode_control_codes (gchar * text, gsize length, gboolean is_multibyte)
 {
  gsize pos = 0;
  while (pos < length) {
    if (is_multibyte) {
      guint16 code = GST_READ_UINT16_BE (text + pos);
      if (code == 0x000A) {
        text[pos] = 0xE0;
        text[pos + 1] = 0x8A;
      }
      pos += 2;
    } else {
      guint8 code = text[pos];
      if (code == 0x0A)
        text[pos] = 0x8A;
      pos++;
    }
  }
 }
 /**
 * dvb_text_from_utf8:
 * @text: The text to convert. This should be in UTF-8 format
 * @out_size: (out) the byte length of the new text
 *
 * Converts UTF-8 strings to text characters compliant with EN 300 468.
 * The converted text can be used directly in DVB #GstMpegTsDescriptor
 *
 * The function will try different character maps until the string is
 * completely converted.
 *
 * The function tries the default ISO 6937 character map first.
 *
 * If no character map that contains all characters could be found, the
 * string is converted to ISO 6937 with unknown characters set to `?`.
 *
 * Returns: (transfer full) byte array of size @out_size
 */
 guint8 *
 dvb_text_from_utf8 (const gchar * text, gsize * out_size)
 {
  GError *error = NULL;
  gchar *out_text;
  guint8 *out_buffer;
  guint encoding;
  GIConv giconv = (GIConv) - 1;
  /* We test character maps one-by-one. Start with the default */
  encoding = _ICONV_ISO6937;
  giconv = _get_iconv (_ICONV_UTF8, encoding);
  out_text = g_convert_with_iconv (text, -1, giconv, NULL, out_size, &error);
  if (out_text) {
    GST_DEBUG ("Using default ISO6937 encoding");
    goto out;
  }
  g_clear_error (&error);
  for (encoding = _ICONV_ISO8859_1; encoding <= _ICONV_ISO10646_UTF8;
      encoding++) {
    giconv = _get_iconv (_ICONV_UTF8, encoding);
    if (giconv == (GIConv) - 1)
      continue;
    out_text = g_convert_with_iconv (text, -1, giconv, NULL, out_size, &error);
    if (out_text) {
      GST_DEBUG ("Found suitable character map - %s", iconvtablename[encoding]);
      goto out;
    }
    g_clear_error (&error);
  }
  out_text = g_convert_with_fallback (text, -1, iconvtablename[_ICONV_ISO6937],
      iconvtablename[_ICONV_UTF8], "?", NULL, out_size, &error);
 out:
  if (error) {
    GST_WARNING ("Could not convert from utf-8: %s", error->message);
    g_error_free (error);
    if (out_text)
      g_free (out_text);
    return NULL;
  }
  switch (encoding) {
    case _ICONV_ISO6937:
      /* Default encoding contains no selection bytes. */
      _encode_control_codes (out_text, *out_size, FALSE);
      return (guint8 *) out_text;
    case _ICONV_ISO8859_1:
    case _ICONV_ISO8859_2:
    case _ICONV_ISO8859_3:
    case _ICONV_ISO8859_4:
      /* These character sets requires 3 selection bytes */
      _encode_control_codes (out_text, *out_size, FALSE);
      out_buffer = g_malloc (*out_size + 3);
      out_buffer[0] = 0x10;
      out_buffer[1] = 0x00;
      out_buffer[2] = encoding - _ICONV_ISO8859_1 + 1;
      memcpy (out_buffer + 3, out_text, *out_size);
      *out_size += 3;
      g_free (out_text);
      return out_buffer;
    case _ICONV_ISO8859_5:
    case _ICONV_ISO8859_6:
    case _ICONV_ISO8859_7:
    case _ICONV_ISO8859_8:
    case _ICONV_ISO8859_9:
    case _ICONV_ISO8859_10:
    case _ICONV_ISO8859_11:
    case _ICONV_ISO8859_12:
    case _ICONV_ISO8859_13:
    case _ICONV_ISO8859_14:
    case _ICONV_ISO8859_15:
      /* These character sets requires 1 selection byte */
      _encode_control_codes (out_text, *out_size, FALSE);
      out_buffer = g_malloc (*out_size + 1);
      out_buffer[0] = encoding - _ICONV_ISO8859_5 + 1;
      memcpy (out_buffer + 1, out_text, *out_size);
      *out_size += 1;
      g_free (out_text);
      return out_buffer;
    case _ICONV_UCS_2BE:
    case _ICONV_EUC_KR:
    case _ICONV_UTF_16BE:
      /* These character sets requires 1 selection byte */
      _encode_control_codes (out_text, *out_size, TRUE);
      out_buffer = g_malloc (*out_size + 1);
      out_buffer[0] = encoding - _ICONV_UCS_2BE + 0x11;
      memcpy (out_buffer + 1, out_text, *out_size);
      *out_size += 1;
      g_free (out_text);
      return out_buffer;
    case _ICONV_GB2312:
    case _ICONV_ISO10646_UTF8:
      /* These character sets requires 1 selection byte */
      _encode_control_codes (out_text, *out_size, FALSE);
      out_buffer = g_malloc (*out_size + 1);
      out_buffer[0] = encoding - _ICONV_UCS_2BE + 0x11;
      memcpy (out_buffer + 1, out_text, *out_size);
      *out_size += 1;
      g_free (out_text);
      return out_buffer;
    default:
      g_free (out_text);
      return NULL;
  }
 }
 /*
 * @text: The text to convert. It may include pango markup (<b> and </b>)
 * @length: The length of the string -1 if it's nul-terminated