API: add gst_tag_freeform_string_to_utf8() (#405072).

Original commit message from CVS: * docs/libs/gst-plugins-base-libs-sections.txt: * gst-libs/gst/tag/tag.h: * gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8): API: add gst_tag_freeform_string_to_utf8() (#405072). * gst-libs/gst/tag/gstid3tag.c: (gst_tag_extract_id3v1_string): Use gst_tag_freeform_string_to_utf8() here.
2025-01-30 02:58:24 +00:00 · 2007-04-12 12:19:20 +00:00 · 2007-04-12 12:19:20 +00:00 · a208469078
commit a208469078
parent 8a6b8cfb37
5 changed files with 113 additions and 58 deletions
--- a/10
+++ b/10
@ -1,3 +1,13 @@
+2007-04-12  Tim-Philipp Müller  <tim at centricular dot net>
+
+	* docs/libs/gst-plugins-base-libs-sections.txt:
+	* gst-libs/gst/tag/tag.h:
+	* gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8):
+	  API: add gst_tag_freeform_string_to_utf8() (#405072).
+
+	* gst-libs/gst/tag/gstid3tag.c: (gst_tag_extract_id3v1_string):
+	  Use gst_tag_freeform_string_to_utf8() here.
+
 2007-04-12  Wim Taymans  <wim@fluendo.com>

 	* gst/gdp/gstgdppay.c: (gst_gdp_pay_chain),
--- a/docs/libs/gst-plugins-base-libs-sections.txt
+++ b/docs/libs/gst-plugins-base-libs-sections.txt
@ -960,6 +960,7 @@ GST_TAG_CMML_CLIP
 GST_TAG_CMML_HEAD
 GST_TAG_CMML_STREAM
 gst_tag_register_musicbrainz_tags
+gst_tag_freeform_string_to_utf8
 gst_tag_parse_extended_comment
 GstTagImageType
 <SUBSECTION Standard>
--- a/gst-libs/gst/tag/gstid3tag.c
+++ b/gst-libs/gst/tag/gstid3tag.c
@ -320,66 +320,14 @@ static void
 gst_tag_extract_id3v1_string (GstTagList * list, const gchar * tag,
    const gchar * start, const guint size)
 {
-  const gchar *env;
-  gsize bytes_read;
+  const gchar *env_vars[] = { "GST_ID3V1_TAG_ENCODING",
+    "GST_ID3_TAG_ENCODING", "GST_TAG_ENCODING", NULL
+  };
  gchar *utf8;

-  /* Should we try the charsets specified
-   * via environment variables FIRST ? */
-  if (g_utf8_validate (start, size, NULL)) {
-    utf8 = g_strndup (start, size);
-    goto beach;
-  }
+  utf8 = gst_tag_freeform_string_to_utf8 (start, size, env_vars);

-  env = g_getenv ("GST_ID3V1_TAG_ENCODING");
-  if (!env || *env == '\0')
-    env = g_getenv ("GST_ID3_TAG_ENCODING");
-  if (!env || *env == '\0')
-    env = g_getenv ("GST_TAG_ENCODING");
-
-  /* Try charsets specified via the environment */
-  if (env && *env != '\0') {
-    gchar **c, **csets;
-
-    csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
-
-    for (c = csets; c && *c; ++c) {
-      if ((utf8 =
-              g_convert (start, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
-        if (bytes_read == size) {
-          g_strfreev (csets);
-          goto beach;
-        }
-        g_free (utf8);
-        utf8 = NULL;
-      }
-    }
-  }
-  /* Try current locale (if not UTF-8) */
-  if (!g_get_charset (&env)) {
-    if ((utf8 = g_locale_to_utf8 (start, size, &bytes_read, NULL, NULL))) {
-      if (bytes_read == size) {
-        goto beach;
-      }
-      g_free (utf8);
-      utf8 = NULL;
-    }
-  }
-
-  /* Try ISO-8859-1 */
-  utf8 =
-      g_convert (start, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL);
-  if (utf8 != NULL && bytes_read == size) {
-    goto beach;
-  }
-
-  g_free (utf8);
-  return;
-
-beach:
-
-  g_strchomp (utf8);
-  if (utf8 && utf8[0] != '\0') {
+  if (utf8 && *utf8 != '\0') {
    gst_tag_list_add (list, GST_TAG_MERGE_REPLACE, tag, utf8, NULL);
  }

--- a/gst-libs/gst/tag/tag.h
+++ b/gst-libs/gst/tag/tag.h
@ -210,8 +210,12 @@ gboolean                gst_tag_parse_extended_comment (const gchar  * ext_comme
                                                        gchar       ** value,
                                                        gboolean       fail_if_no_key);

+gchar                 * gst_tag_freeform_string_to_utf8 (const gchar  * data,
+                                                         gint           size,
+                                                         const gchar ** env_vars);
+
 /* FIXME 0.11: replace with a more general gst_tag_library_init() */
-void gst_tag_register_musicbrainz_tags (void);
+void                    gst_tag_register_musicbrainz_tags (void);

 G_END_DECLS

--- a/gst-libs/gst/tag/tags.c
+++ b/gst-libs/gst/tag/tags.c
@ -222,3 +222,95 @@ gst_tag_parse_extended_comment (const gchar * ext_comment, gchar ** key,

  return TRUE;
 }
+
+/**
+ * gst_tag_freeform_string_to_utf8:
+ * @data: string data
+ * @size: length of string data, or -1 if the string is NUL-terminated
+ * @env_vars: a NULL-terminated string array of environment variable names,
+ *            or NULL
+ *
+ * Convenience function to read a string with unknown character encoding. If
+ * the string is already in UTF-8 encoding, it will be returned right away.
+ * Otherwise, the environment will be searched for a number of environment
+ * variables (whose names are specified in the NULL-terminated string array
+ * @env_vars) containing a list of character encodings to try/use. If none
+ * are specified, the current locale will be tried. If that also doesn't work,
+ * ISO-8859-1 is assumed (which will almost always succeed).
+ *
+ * Returns: a newly-allocated string in UTF-8 encoding, or NULL
+ *
+ * Since: 0.10.13
+ */
+gchar *
+gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
+    const gchar ** env_vars)
+{
+  const gchar *env = NULL;
+  gsize bytes_read;
+  gchar *utf8 = NULL;
+
+  g_return_val_if_fail (data != NULL, NULL);
+
+  if (size < 0)
+    size = strlen (data);
+
+  /* Should we try the charsets specified
+   * via environment variables FIRST ? */
+  if (g_utf8_validate (data, size, NULL))
+    return g_strndup (data, size);
+
+  while ((env == NULL || *env == '\0') && env_vars && *env_vars != NULL) {
+    env = g_getenv (*env_vars);
+    ++env_vars;
+  }
+
+  /* Try charsets specified via the environment */
+  if (env != NULL && *env != '\0') {
+    gchar **c, **csets;
+
+    csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
+
+    for (c = csets; c && *c; ++c) {
+      if ((utf8 = g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
+        if (bytes_read == size) {
+          g_strfreev (csets);
+          goto beach;
+        }
+        g_free (utf8);
+        utf8 = NULL;
+      }
+    }
+
+    g_strfreev (csets);
+  }
+
+  /* Try current locale (if not UTF-8) */
+  if (!g_get_charset (&env)) {
+    if ((utf8 = g_locale_to_utf8 (data, size, &bytes_read, NULL, NULL))) {
+      if (bytes_read == size) {
+        goto beach;
+      }
+      g_free (utf8);
+      utf8 = NULL;
+    }
+  }
+
+  /* Try ISO-8859-1 */
+  utf8 = g_convert (data, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL);
+  if (utf8 != NULL && bytes_read == size) {
+    goto beach;
+  }
+
+  g_free (utf8);
+  return NULL;
+
+beach:
+
+  g_strchomp (utf8);
+  if (utf8 && utf8[0] != '\0')
+    return utf8;
+
+  g_free (utf8);
+  return NULL;
+}